qemu/hw/9pfs/virtio-9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "hw/virtio.h"
  15#include "hw/pc.h"
  16#include "qemu/sockets.h"
  17#include "hw/virtio-pci.h"
  18#include "virtio-9p.h"
  19#include "fsdev/qemu-fsdev.h"
  20#include "virtio-9p-xattr.h"
  21#include "virtio-9p-coth.h"
  22#include "trace.h"
  23#include "migration/migration.h"
  24
  25int open_fd_hw;
  26int total_open_fd;
  27static int open_fd_rc;
  28
  29enum {
  30    Oread   = 0x00,
  31    Owrite  = 0x01,
  32    Ordwr   = 0x02,
  33    Oexec   = 0x03,
  34    Oexcl   = 0x04,
  35    Otrunc  = 0x10,
  36    Orexec  = 0x20,
  37    Orclose = 0x40,
  38    Oappend = 0x80,
  39};
  40
  41static int omode_to_uflags(int8_t mode)
  42{
  43    int ret = 0;
  44
  45    switch (mode & 3) {
  46    case Oread:
  47        ret = O_RDONLY;
  48        break;
  49    case Ordwr:
  50        ret = O_RDWR;
  51        break;
  52    case Owrite:
  53        ret = O_WRONLY;
  54        break;
  55    case Oexec:
  56        ret = O_RDONLY;
  57        break;
  58    }
  59
  60    if (mode & Otrunc) {
  61        ret |= O_TRUNC;
  62    }
  63
  64    if (mode & Oappend) {
  65        ret |= O_APPEND;
  66    }
  67
  68    if (mode & Oexcl) {
  69        ret |= O_EXCL;
  70    }
  71
  72    return ret;
  73}
  74
  75struct dotl_openflag_map {
  76    int dotl_flag;
  77    int open_flag;
  78};
  79
  80static int dotl_to_open_flags(int flags)
  81{
  82    int i;
  83    /*
  84     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
  85     * and P9_DOTL_NOACCESS
  86     */
  87    int oflags = flags & O_ACCMODE;
  88
  89    struct dotl_openflag_map dotl_oflag_map[] = {
  90        { P9_DOTL_CREATE, O_CREAT },
  91        { P9_DOTL_EXCL, O_EXCL },
  92        { P9_DOTL_NOCTTY , O_NOCTTY },
  93        { P9_DOTL_TRUNC, O_TRUNC },
  94        { P9_DOTL_APPEND, O_APPEND },
  95        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
  96        { P9_DOTL_DSYNC, O_DSYNC },
  97        { P9_DOTL_FASYNC, FASYNC },
  98        { P9_DOTL_DIRECT, O_DIRECT },
  99        { P9_DOTL_LARGEFILE, O_LARGEFILE },
 100        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 101        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 102        { P9_DOTL_NOATIME, O_NOATIME },
 103        { P9_DOTL_SYNC, O_SYNC },
 104    };
 105
 106    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 107        if (flags & dotl_oflag_map[i].dotl_flag) {
 108            oflags |= dotl_oflag_map[i].open_flag;
 109        }
 110    }
 111
 112    return oflags;
 113}
 114
 115void cred_init(FsCred *credp)
 116{
 117    credp->fc_uid = -1;
 118    credp->fc_gid = -1;
 119    credp->fc_mode = -1;
 120    credp->fc_rdev = -1;
 121}
 122
 123static int get_dotl_openflags(V9fsState *s, int oflags)
 124{
 125    int flags;
 126    /*
 127     * Filter the client open flags
 128     */
 129    flags = dotl_to_open_flags(oflags);
 130    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 131    /*
 132     * Ignore direct disk access hint until the server supports it.
 133     */
 134    flags &= ~O_DIRECT;
 135    return flags;
 136}
 137
 138void v9fs_path_init(V9fsPath *path)
 139{
 140    path->data = NULL;
 141    path->size = 0;
 142}
 143
 144void v9fs_path_free(V9fsPath *path)
 145{
 146    g_free(path->data);
 147    path->data = NULL;
 148    path->size = 0;
 149}
 150
 151void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs)
 152{
 153    v9fs_path_free(lhs);
 154    lhs->data = g_malloc(rhs->size);
 155    memcpy(lhs->data, rhs->data, rhs->size);
 156    lhs->size = rhs->size;
 157}
 158
 159int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 160                      const char *name, V9fsPath *path)
 161{
 162    int err;
 163    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 164    if (err < 0) {
 165        err = -errno;
 166    }
 167    return err;
 168}
 169
 170/*
 171 * Return TRUE if s1 is an ancestor of s2.
 172 *
 173 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 174 * As a special case, We treat s1 as ancestor of s2 if they are same!
 175 */
 176static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 177{
 178    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 179        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 180            return 1;
 181        }
 182    }
 183    return 0;
 184}
 185
 186static size_t v9fs_string_size(V9fsString *str)
 187{
 188    return str->size;
 189}
 190
 191/*
 192 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 193static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 194{
 195    int err = 1;
 196    if (f->fid_type == P9_FID_FILE) {
 197        if (f->fs.fd == -1) {
 198            do {
 199                err = v9fs_co_open(pdu, f, f->open_flags);
 200            } while (err == -EINTR && !pdu->cancelled);
 201        }
 202    } else if (f->fid_type == P9_FID_DIR) {
 203        if (f->fs.dir == NULL) {
 204            do {
 205                err = v9fs_co_opendir(pdu, f);
 206            } while (err == -EINTR && !pdu->cancelled);
 207        }
 208    }
 209    return err;
 210}
 211
 212static V9fsFidState *get_fid(V9fsPDU *pdu, int32_t fid)
 213{
 214    int err;
 215    V9fsFidState *f;
 216    V9fsState *s = pdu->s;
 217
 218    for (f = s->fid_list; f; f = f->next) {
 219        BUG_ON(f->clunked);
 220        if (f->fid == fid) {
 221            /*
 222             * Update the fid ref upfront so that
 223             * we don't get reclaimed when we yield
 224             * in open later.
 225             */
 226            f->ref++;
 227            /*
 228             * check whether we need to reopen the
 229             * file. We might have closed the fd
 230             * while trying to free up some file
 231             * descriptors.
 232             */
 233            err = v9fs_reopen_fid(pdu, f);
 234            if (err < 0) {
 235                f->ref--;
 236                return NULL;
 237            }
 238            /*
 239             * Mark the fid as referenced so that the LRU
 240             * reclaim won't close the file descriptor
 241             */
 242            f->flags |= FID_REFERENCED;
 243            return f;
 244        }
 245    }
 246    return NULL;
 247}
 248
 249static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 250{
 251    V9fsFidState *f;
 252
 253    for (f = s->fid_list; f; f = f->next) {
 254        /* If fid is already there return NULL */
 255        BUG_ON(f->clunked);
 256        if (f->fid == fid) {
 257            return NULL;
 258        }
 259    }
 260    f = g_malloc0(sizeof(V9fsFidState));
 261    f->fid = fid;
 262    f->fid_type = P9_FID_NONE;
 263    f->ref = 1;
 264    /*
 265     * Mark the fid as referenced so that the LRU
 266     * reclaim won't close the file descriptor
 267     */
 268    f->flags |= FID_REFERENCED;
 269    f->next = s->fid_list;
 270    s->fid_list = f;
 271
 272    return f;
 273}
 274
 275static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 276{
 277    int retval = 0;
 278
 279    if (fidp->fs.xattr.copied_len == -1) {
 280        /* getxattr/listxattr fid */
 281        goto free_value;
 282    }
 283    /*
 284     * if this is fid for setxattr. clunk should
 285     * result in setxattr localcall
 286     */
 287    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 288        /* clunk after partial write */
 289        retval = -EINVAL;
 290        goto free_out;
 291    }
 292    if (fidp->fs.xattr.len) {
 293        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 294                                   fidp->fs.xattr.value,
 295                                   fidp->fs.xattr.len,
 296                                   fidp->fs.xattr.flags);
 297    } else {
 298        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 299    }
 300free_out:
 301    v9fs_string_free(&fidp->fs.xattr.name);
 302free_value:
 303    if (fidp->fs.xattr.value) {
 304        g_free(fidp->fs.xattr.value);
 305    }
 306    return retval;
 307}
 308
 309static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 310{
 311    int retval = 0;
 312
 313    if (fidp->fid_type == P9_FID_FILE) {
 314        /* If we reclaimed the fd no need to close */
 315        if (fidp->fs.fd != -1) {
 316            retval = v9fs_co_close(pdu, &fidp->fs);
 317        }
 318    } else if (fidp->fid_type == P9_FID_DIR) {
 319        if (fidp->fs.dir != NULL) {
 320            retval = v9fs_co_closedir(pdu, &fidp->fs);
 321        }
 322    } else if (fidp->fid_type == P9_FID_XATTR) {
 323        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 324    }
 325    v9fs_path_free(&fidp->path);
 326    g_free(fidp);
 327    return retval;
 328}
 329
 330static int put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 331{
 332    BUG_ON(!fidp->ref);
 333    fidp->ref--;
 334    /*
 335     * Don't free the fid if it is in reclaim list
 336     */
 337    if (!fidp->ref && fidp->clunked) {
 338        if (fidp->fid == pdu->s->root_fid) {
 339            /*
 340             * if the clunked fid is root fid then we
 341             * have unmounted the fs on the client side.
 342             * delete the migration blocker. Ideally, this
 343             * should be hooked to transport close notification
 344             */
 345            if (pdu->s->migration_blocker) {
 346                migrate_del_blocker(pdu->s->migration_blocker);
 347                error_free(pdu->s->migration_blocker);
 348                pdu->s->migration_blocker = NULL;
 349            }
 350        }
 351        return free_fid(pdu, fidp);
 352    }
 353    return 0;
 354}
 355
 356static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 357{
 358    V9fsFidState **fidpp, *fidp;
 359
 360    for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
 361        if ((*fidpp)->fid == fid) {
 362            break;
 363        }
 364    }
 365    if (*fidpp == NULL) {
 366        return NULL;
 367    }
 368    fidp = *fidpp;
 369    *fidpp = fidp->next;
 370    fidp->clunked = 1;
 371    return fidp;
 372}
 373
 374void v9fs_reclaim_fd(V9fsPDU *pdu)
 375{
 376    int reclaim_count = 0;
 377    V9fsState *s = pdu->s;
 378    V9fsFidState *f, *reclaim_list = NULL;
 379
 380    for (f = s->fid_list; f; f = f->next) {
 381        /*
 382         * Unlink fids cannot be reclaimed. Check
 383         * for them and skip them. Also skip fids
 384         * currently being operated on.
 385         */
 386        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 387            continue;
 388        }
 389        /*
 390         * if it is a recently referenced fid
 391         * we leave the fid untouched and clear the
 392         * reference bit. We come back to it later
 393         * in the next iteration. (a simple LRU without
 394         * moving list elements around)
 395         */
 396        if (f->flags & FID_REFERENCED) {
 397            f->flags &= ~FID_REFERENCED;
 398            continue;
 399        }
 400        /*
 401         * Add fids to reclaim list.
 402         */
 403        if (f->fid_type == P9_FID_FILE) {
 404            if (f->fs.fd != -1) {
 405                /*
 406                 * Up the reference count so that
 407                 * a clunk request won't free this fid
 408                 */
 409                f->ref++;
 410                f->rclm_lst = reclaim_list;
 411                reclaim_list = f;
 412                f->fs_reclaim.fd = f->fs.fd;
 413                f->fs.fd = -1;
 414                reclaim_count++;
 415            }
 416        } else if (f->fid_type == P9_FID_DIR) {
 417            if (f->fs.dir != NULL) {
 418                /*
 419                 * Up the reference count so that
 420                 * a clunk request won't free this fid
 421                 */
 422                f->ref++;
 423                f->rclm_lst = reclaim_list;
 424                reclaim_list = f;
 425                f->fs_reclaim.dir = f->fs.dir;
 426                f->fs.dir = NULL;
 427                reclaim_count++;
 428            }
 429        }
 430        if (reclaim_count >= open_fd_rc) {
 431            break;
 432        }
 433    }
 434    /*
 435     * Now close the fid in reclaim list. Free them if they
 436     * are already clunked.
 437     */
 438    while (reclaim_list) {
 439        f = reclaim_list;
 440        reclaim_list = f->rclm_lst;
 441        if (f->fid_type == P9_FID_FILE) {
 442            v9fs_co_close(pdu, &f->fs_reclaim);
 443        } else if (f->fid_type == P9_FID_DIR) {
 444            v9fs_co_closedir(pdu, &f->fs_reclaim);
 445        }
 446        f->rclm_lst = NULL;
 447        /*
 448         * Now drop the fid reference, free it
 449         * if clunked.
 450         */
 451        put_fid(pdu, f);
 452    }
 453}
 454
 455static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 456{
 457    int err;
 458    V9fsState *s = pdu->s;
 459    V9fsFidState *fidp, head_fid;
 460
 461    head_fid.next = s->fid_list;
 462    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
 463        if (fidp->path.size != path->size) {
 464            continue;
 465        }
 466        if (!memcmp(fidp->path.data, path->data, path->size)) {
 467            /* Mark the fid non reclaimable. */
 468            fidp->flags |= FID_NON_RECLAIMABLE;
 469
 470            /* reopen the file/dir if already closed */
 471            err = v9fs_reopen_fid(pdu, fidp);
 472            if (err < 0) {
 473                return -1;
 474            }
 475            /*
 476             * Go back to head of fid list because
 477             * the list could have got updated when
 478             * switched to the worker thread
 479             */
 480            if (err == 0) {
 481                fidp = &head_fid;
 482            }
 483        }
 484    }
 485    return 0;
 486}
 487
 488static void virtfs_reset(V9fsPDU *pdu)
 489{
 490    V9fsState *s = pdu->s;
 491    V9fsFidState *fidp = NULL;
 492
 493    /* Free all fids */
 494    while (s->fid_list) {
 495        fidp = s->fid_list;
 496        s->fid_list = fidp->next;
 497
 498        if (fidp->ref) {
 499            fidp->clunked = 1;
 500        } else {
 501            free_fid(pdu, fidp);
 502        }
 503    }
 504    if (fidp) {
 505        /* One or more unclunked fids found... */
 506        error_report("9pfs:%s: One or more uncluncked fids "
 507                     "found during reset", __func__);
 508    }
 509}
 510
 511#define P9_QID_TYPE_DIR         0x80
 512#define P9_QID_TYPE_SYMLINK     0x02
 513
 514#define P9_STAT_MODE_DIR        0x80000000
 515#define P9_STAT_MODE_APPEND     0x40000000
 516#define P9_STAT_MODE_EXCL       0x20000000
 517#define P9_STAT_MODE_MOUNT      0x10000000
 518#define P9_STAT_MODE_AUTH       0x08000000
 519#define P9_STAT_MODE_TMP        0x04000000
 520#define P9_STAT_MODE_SYMLINK    0x02000000
 521#define P9_STAT_MODE_LINK       0x01000000
 522#define P9_STAT_MODE_DEVICE     0x00800000
 523#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 524#define P9_STAT_MODE_SOCKET     0x00100000
 525#define P9_STAT_MODE_SETUID     0x00080000
 526#define P9_STAT_MODE_SETGID     0x00040000
 527#define P9_STAT_MODE_SETVTX     0x00010000
 528
 529#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 530                                P9_STAT_MODE_SYMLINK |      \
 531                                P9_STAT_MODE_LINK |         \
 532                                P9_STAT_MODE_DEVICE |       \
 533                                P9_STAT_MODE_NAMED_PIPE |   \
 534                                P9_STAT_MODE_SOCKET)
 535
 536/* This is the algorithm from ufs in spfs */
 537static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
 538{
 539    size_t size;
 540
 541    memset(&qidp->path, 0, sizeof(qidp->path));
 542    size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 543    memcpy(&qidp->path, &stbuf->st_ino, size);
 544    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 545    qidp->type = 0;
 546    if (S_ISDIR(stbuf->st_mode)) {
 547        qidp->type |= P9_QID_TYPE_DIR;
 548    }
 549    if (S_ISLNK(stbuf->st_mode)) {
 550        qidp->type |= P9_QID_TYPE_SYMLINK;
 551    }
 552}
 553
 554static int fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, V9fsQID *qidp)
 555{
 556    struct stat stbuf;
 557    int err;
 558
 559    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
 560    if (err < 0) {
 561        return err;
 562    }
 563    stat_to_qid(&stbuf, qidp);
 564    return 0;
 565}
 566
 567static V9fsPDU *alloc_pdu(V9fsState *s)
 568{
 569    V9fsPDU *pdu = NULL;
 570
 571    if (!QLIST_EMPTY(&s->free_list)) {
 572        pdu = QLIST_FIRST(&s->free_list);
 573        QLIST_REMOVE(pdu, next);
 574        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
 575    }
 576    return pdu;
 577}
 578
 579static void free_pdu(V9fsState *s, V9fsPDU *pdu)
 580{
 581    if (pdu) {
 582        /*
 583         * Cancelled pdu are added back to the freelist
 584         * by flush request .
 585         */
 586        if (!pdu->cancelled) {
 587            QLIST_REMOVE(pdu, next);
 588            QLIST_INSERT_HEAD(&s->free_list, pdu, next);
 589        }
 590    }
 591}
 592
 593/*
 594 * We don't do error checking for pdu_marshal/unmarshal here
 595 * because we always expect to have enough space to encode
 596 * error details
 597 */
 598static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len)
 599{
 600    int8_t id = pdu->id + 1; /* Response */
 601
 602    if (len < 0) {
 603        int err = -len;
 604        len = 7;
 605
 606        if (s->proto_version != V9FS_PROTO_2000L) {
 607            V9fsString str;
 608
 609            str.data = strerror(err);
 610            str.size = strlen(str.data);
 611
 612            len += pdu_marshal(pdu, len, "s", &str);
 613            id = P9_RERROR;
 614        }
 615
 616        len += pdu_marshal(pdu, len, "d", err);
 617
 618        if (s->proto_version == V9FS_PROTO_2000L) {
 619            id = P9_RLERROR;
 620        }
 621        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
 622    }
 623
 624    /* fill out the header */
 625    pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag);
 626
 627    /* keep these in sync */
 628    pdu->size = len;
 629    pdu->id = id;
 630
 631    /* push onto queue and notify */
 632    virtqueue_push(s->vq, &pdu->elem, len);
 633
 634    /* FIXME: we should batch these completions */
 635    virtio_notify(&s->vdev, s->vq);
 636
 637    /* Now wakeup anybody waiting in flush for this request */
 638    qemu_co_queue_next(&pdu->complete);
 639
 640    free_pdu(s, pdu);
 641}
 642
 643static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
 644{
 645    mode_t ret;
 646
 647    ret = mode & 0777;
 648    if (mode & P9_STAT_MODE_DIR) {
 649        ret |= S_IFDIR;
 650    }
 651
 652    if (mode & P9_STAT_MODE_SYMLINK) {
 653        ret |= S_IFLNK;
 654    }
 655    if (mode & P9_STAT_MODE_SOCKET) {
 656        ret |= S_IFSOCK;
 657    }
 658    if (mode & P9_STAT_MODE_NAMED_PIPE) {
 659        ret |= S_IFIFO;
 660    }
 661    if (mode & P9_STAT_MODE_DEVICE) {
 662        if (extension && extension->data[0] == 'c') {
 663            ret |= S_IFCHR;
 664        } else {
 665            ret |= S_IFBLK;
 666        }
 667    }
 668
 669    if (!(ret&~0777)) {
 670        ret |= S_IFREG;
 671    }
 672
 673    if (mode & P9_STAT_MODE_SETUID) {
 674        ret |= S_ISUID;
 675    }
 676    if (mode & P9_STAT_MODE_SETGID) {
 677        ret |= S_ISGID;
 678    }
 679    if (mode & P9_STAT_MODE_SETVTX) {
 680        ret |= S_ISVTX;
 681    }
 682
 683    return ret;
 684}
 685
 686static int donttouch_stat(V9fsStat *stat)
 687{
 688    if (stat->type == -1 &&
 689        stat->dev == -1 &&
 690        stat->qid.type == -1 &&
 691        stat->qid.version == -1 &&
 692        stat->qid.path == -1 &&
 693        stat->mode == -1 &&
 694        stat->atime == -1 &&
 695        stat->mtime == -1 &&
 696        stat->length == -1 &&
 697        !stat->name.size &&
 698        !stat->uid.size &&
 699        !stat->gid.size &&
 700        !stat->muid.size &&
 701        stat->n_uid == -1 &&
 702        stat->n_gid == -1 &&
 703        stat->n_muid == -1) {
 704        return 1;
 705    }
 706
 707    return 0;
 708}
 709
 710static void v9fs_stat_init(V9fsStat *stat)
 711{
 712    v9fs_string_init(&stat->name);
 713    v9fs_string_init(&stat->uid);
 714    v9fs_string_init(&stat->gid);
 715    v9fs_string_init(&stat->muid);
 716    v9fs_string_init(&stat->extension);
 717}
 718
 719static void v9fs_stat_free(V9fsStat *stat)
 720{
 721    v9fs_string_free(&stat->name);
 722    v9fs_string_free(&stat->uid);
 723    v9fs_string_free(&stat->gid);
 724    v9fs_string_free(&stat->muid);
 725    v9fs_string_free(&stat->extension);
 726}
 727
 728static uint32_t stat_to_v9mode(const struct stat *stbuf)
 729{
 730    uint32_t mode;
 731
 732    mode = stbuf->st_mode & 0777;
 733    if (S_ISDIR(stbuf->st_mode)) {
 734        mode |= P9_STAT_MODE_DIR;
 735    }
 736
 737    if (S_ISLNK(stbuf->st_mode)) {
 738        mode |= P9_STAT_MODE_SYMLINK;
 739    }
 740
 741    if (S_ISSOCK(stbuf->st_mode)) {
 742        mode |= P9_STAT_MODE_SOCKET;
 743    }
 744
 745    if (S_ISFIFO(stbuf->st_mode)) {
 746        mode |= P9_STAT_MODE_NAMED_PIPE;
 747    }
 748
 749    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
 750        mode |= P9_STAT_MODE_DEVICE;
 751    }
 752
 753    if (stbuf->st_mode & S_ISUID) {
 754        mode |= P9_STAT_MODE_SETUID;
 755    }
 756
 757    if (stbuf->st_mode & S_ISGID) {
 758        mode |= P9_STAT_MODE_SETGID;
 759    }
 760
 761    if (stbuf->st_mode & S_ISVTX) {
 762        mode |= P9_STAT_MODE_SETVTX;
 763    }
 764
 765    return mode;
 766}
 767
 768static int stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
 769                            const struct stat *stbuf,
 770                            V9fsStat *v9stat)
 771{
 772    int err;
 773    const char *str;
 774
 775    memset(v9stat, 0, sizeof(*v9stat));
 776
 777    stat_to_qid(stbuf, &v9stat->qid);
 778    v9stat->mode = stat_to_v9mode(stbuf);
 779    v9stat->atime = stbuf->st_atime;
 780    v9stat->mtime = stbuf->st_mtime;
 781    v9stat->length = stbuf->st_size;
 782
 783    v9fs_string_null(&v9stat->uid);
 784    v9fs_string_null(&v9stat->gid);
 785    v9fs_string_null(&v9stat->muid);
 786
 787    v9stat->n_uid = stbuf->st_uid;
 788    v9stat->n_gid = stbuf->st_gid;
 789    v9stat->n_muid = 0;
 790
 791    v9fs_string_null(&v9stat->extension);
 792
 793    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
 794        err = v9fs_co_readlink(pdu, name, &v9stat->extension);
 795        if (err < 0) {
 796            return err;
 797        }
 798    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
 799        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
 800                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
 801                major(stbuf->st_rdev), minor(stbuf->st_rdev));
 802    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
 803        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
 804                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
 805    }
 806
 807    str = strrchr(name->data, '/');
 808    if (str) {
 809        str += 1;
 810    } else {
 811        str = name->data;
 812    }
 813
 814    v9fs_string_sprintf(&v9stat->name, "%s", str);
 815
 816    v9stat->size = 61 +
 817        v9fs_string_size(&v9stat->name) +
 818        v9fs_string_size(&v9stat->uid) +
 819        v9fs_string_size(&v9stat->gid) +
 820        v9fs_string_size(&v9stat->muid) +
 821        v9fs_string_size(&v9stat->extension);
 822    return 0;
 823}
 824
 825#define P9_STATS_MODE          0x00000001ULL
 826#define P9_STATS_NLINK         0x00000002ULL
 827#define P9_STATS_UID           0x00000004ULL
 828#define P9_STATS_GID           0x00000008ULL
 829#define P9_STATS_RDEV          0x00000010ULL
 830#define P9_STATS_ATIME         0x00000020ULL
 831#define P9_STATS_MTIME         0x00000040ULL
 832#define P9_STATS_CTIME         0x00000080ULL
 833#define P9_STATS_INO           0x00000100ULL
 834#define P9_STATS_SIZE          0x00000200ULL
 835#define P9_STATS_BLOCKS        0x00000400ULL
 836
 837#define P9_STATS_BTIME         0x00000800ULL
 838#define P9_STATS_GEN           0x00001000ULL
 839#define P9_STATS_DATA_VERSION  0x00002000ULL
 840
 841#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
 842#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
 843
 844
 845static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
 846                                V9fsStatDotl *v9lstat)
 847{
 848    memset(v9lstat, 0, sizeof(*v9lstat));
 849
 850    v9lstat->st_mode = stbuf->st_mode;
 851    v9lstat->st_nlink = stbuf->st_nlink;
 852    v9lstat->st_uid = stbuf->st_uid;
 853    v9lstat->st_gid = stbuf->st_gid;
 854    v9lstat->st_rdev = stbuf->st_rdev;
 855    v9lstat->st_size = stbuf->st_size;
 856    v9lstat->st_blksize = stbuf->st_blksize;
 857    v9lstat->st_blocks = stbuf->st_blocks;
 858    v9lstat->st_atime_sec = stbuf->st_atime;
 859    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
 860    v9lstat->st_mtime_sec = stbuf->st_mtime;
 861    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 862    v9lstat->st_ctime_sec = stbuf->st_ctime;
 863    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
 864    /* Currently we only support BASIC fields in stat */
 865    v9lstat->st_result_mask = P9_STATS_BASIC;
 866
 867    stat_to_qid(stbuf, &v9lstat->qid);
 868}
 869
 870static void print_sg(struct iovec *sg, int cnt)
 871{
 872    int i;
 873
 874    printf("sg[%d]: {", cnt);
 875    for (i = 0; i < cnt; i++) {
 876        if (i) {
 877            printf(", ");
 878        }
 879        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
 880    }
 881    printf("}\n");
 882}
 883
 884/* Will call this only for path name based fid */
 885static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
 886{
 887    V9fsPath str;
 888    v9fs_path_init(&str);
 889    v9fs_path_copy(&str, dst);
 890    v9fs_string_sprintf((V9fsString *)dst, "%s%s", src->data, str.data+len);
 891    v9fs_path_free(&str);
 892    /* +1 to include terminating NULL */
 893    dst->size++;
 894}
 895
 896static inline bool is_ro_export(FsContext *ctx)
 897{
 898    return ctx->export_flags & V9FS_RDONLY;
 899}
 900
 901static void v9fs_version(void *opaque)
 902{
 903    ssize_t err;
 904    V9fsPDU *pdu = opaque;
 905    V9fsState *s = pdu->s;
 906    V9fsString version;
 907    size_t offset = 7;
 908
 909    v9fs_string_init(&version);
 910    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
 911    if (err < 0) {
 912        offset = err;
 913        goto out;
 914    }
 915    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
 916
 917    virtfs_reset(pdu);
 918
 919    if (!strcmp(version.data, "9P2000.u")) {
 920        s->proto_version = V9FS_PROTO_2000U;
 921    } else if (!strcmp(version.data, "9P2000.L")) {
 922        s->proto_version = V9FS_PROTO_2000L;
 923    } else {
 924        v9fs_string_sprintf(&version, "unknown");
 925    }
 926
 927    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
 928    if (err < 0) {
 929        offset = err;
 930        goto out;
 931    }
 932    offset += err;
 933    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
 934out:
 935    complete_pdu(s, pdu, offset);
 936    v9fs_string_free(&version);
 937}
 938
 939static void v9fs_attach(void *opaque)
 940{
 941    V9fsPDU *pdu = opaque;
 942    V9fsState *s = pdu->s;
 943    int32_t fid, afid, n_uname;
 944    V9fsString uname, aname;
 945    V9fsFidState *fidp;
 946    size_t offset = 7;
 947    V9fsQID qid;
 948    ssize_t err;
 949
 950    v9fs_string_init(&uname);
 951    v9fs_string_init(&aname);
 952    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
 953                        &afid, &uname, &aname, &n_uname);
 954    if (err < 0) {
 955        goto out_nofid;
 956    }
 957    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
 958
 959    fidp = alloc_fid(s, fid);
 960    if (fidp == NULL) {
 961        err = -EINVAL;
 962        goto out_nofid;
 963    }
 964    fidp->uid = n_uname;
 965    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
 966    if (err < 0) {
 967        err = -EINVAL;
 968        clunk_fid(s, fid);
 969        goto out;
 970    }
 971    err = fid_to_qid(pdu, fidp, &qid);
 972    if (err < 0) {
 973        err = -EINVAL;
 974        clunk_fid(s, fid);
 975        goto out;
 976    }
 977    err = pdu_marshal(pdu, offset, "Q", &qid);
 978    if (err < 0) {
 979        clunk_fid(s, fid);
 980        goto out;
 981    }
 982    err += offset;
 983    trace_v9fs_attach_return(pdu->tag, pdu->id,
 984                             qid.type, qid.version, qid.path);
 985    /*
 986     * disable migration if we haven't done already.
 987     * attach could get called multiple times for the same export.
 988     */
 989    if (!s->migration_blocker) {
 990        s->root_fid = fid;
 991        error_set(&s->migration_blocker, QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION,
 992                  s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
 993        migrate_add_blocker(s->migration_blocker);
 994    }
 995out:
 996    put_fid(pdu, fidp);
 997out_nofid:
 998    complete_pdu(s, pdu, err);
 999    v9fs_string_free(&uname);
1000    v9fs_string_free(&aname);
1001}
1002
1003static void v9fs_stat(void *opaque)
1004{
1005    int32_t fid;
1006    V9fsStat v9stat;
1007    ssize_t err = 0;
1008    size_t offset = 7;
1009    struct stat stbuf;
1010    V9fsFidState *fidp;
1011    V9fsPDU *pdu = opaque;
1012    V9fsState *s = pdu->s;
1013
1014    err = pdu_unmarshal(pdu, offset, "d", &fid);
1015    if (err < 0) {
1016        goto out_nofid;
1017    }
1018    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1019
1020    fidp = get_fid(pdu, fid);
1021    if (fidp == NULL) {
1022        err = -ENOENT;
1023        goto out_nofid;
1024    }
1025    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1026    if (err < 0) {
1027        goto out;
1028    }
1029    err = stat_to_v9stat(pdu, &fidp->path, &stbuf, &v9stat);
1030    if (err < 0) {
1031        goto out;
1032    }
1033    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1034    if (err < 0) {
1035        v9fs_stat_free(&v9stat);
1036        goto out;
1037    }
1038    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1039                           v9stat.atime, v9stat.mtime, v9stat.length);
1040    err += offset;
1041    v9fs_stat_free(&v9stat);
1042out:
1043    put_fid(pdu, fidp);
1044out_nofid:
1045    complete_pdu(s, pdu, err);
1046}
1047
1048static void v9fs_getattr(void *opaque)
1049{
1050    int32_t fid;
1051    size_t offset = 7;
1052    ssize_t retval = 0;
1053    struct stat stbuf;
1054    V9fsFidState *fidp;
1055    uint64_t request_mask;
1056    V9fsStatDotl v9stat_dotl;
1057    V9fsPDU *pdu = opaque;
1058    V9fsState *s = pdu->s;
1059
1060    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1061    if (retval < 0) {
1062        goto out_nofid;
1063    }
1064    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1065
1066    fidp = get_fid(pdu, fid);
1067    if (fidp == NULL) {
1068        retval = -ENOENT;
1069        goto out_nofid;
1070    }
1071    /*
1072     * Currently we only support BASIC fields in stat, so there is no
1073     * need to look at request_mask.
1074     */
1075    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1076    if (retval < 0) {
1077        goto out;
1078    }
1079    stat_to_v9stat_dotl(s, &stbuf, &v9stat_dotl);
1080
1081    /*  fill st_gen if requested and supported by underlying fs */
1082    if (request_mask & P9_STATS_GEN) {
1083        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1084        if (retval < 0) {
1085            goto out;
1086        }
1087        v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1088    }
1089    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1090    if (retval < 0) {
1091        goto out;
1092    }
1093    retval += offset;
1094    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1095                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1096                              v9stat_dotl.st_gid);
1097out:
1098    put_fid(pdu, fidp);
1099out_nofid:
1100    complete_pdu(s, pdu, retval);
1101}
1102
1103/* Attribute flags */
1104#define P9_ATTR_MODE       (1 << 0)
1105#define P9_ATTR_UID        (1 << 1)
1106#define P9_ATTR_GID        (1 << 2)
1107#define P9_ATTR_SIZE       (1 << 3)
1108#define P9_ATTR_ATIME      (1 << 4)
1109#define P9_ATTR_MTIME      (1 << 5)
1110#define P9_ATTR_CTIME      (1 << 6)
1111#define P9_ATTR_ATIME_SET  (1 << 7)
1112#define P9_ATTR_MTIME_SET  (1 << 8)
1113
1114#define P9_ATTR_MASK    127
1115
1116static void v9fs_setattr(void *opaque)
1117{
1118    int err = 0;
1119    int32_t fid;
1120    V9fsFidState *fidp;
1121    size_t offset = 7;
1122    V9fsIattr v9iattr;
1123    V9fsPDU *pdu = opaque;
1124    V9fsState *s = pdu->s;
1125
1126    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1127    if (err < 0) {
1128        goto out_nofid;
1129    }
1130
1131    fidp = get_fid(pdu, fid);
1132    if (fidp == NULL) {
1133        err = -EINVAL;
1134        goto out_nofid;
1135    }
1136    if (v9iattr.valid & P9_ATTR_MODE) {
1137        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1138        if (err < 0) {
1139            goto out;
1140        }
1141    }
1142    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1143        struct timespec times[2];
1144        if (v9iattr.valid & P9_ATTR_ATIME) {
1145            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1146                times[0].tv_sec = v9iattr.atime_sec;
1147                times[0].tv_nsec = v9iattr.atime_nsec;
1148            } else {
1149                times[0].tv_nsec = UTIME_NOW;
1150            }
1151        } else {
1152            times[0].tv_nsec = UTIME_OMIT;
1153        }
1154        if (v9iattr.valid & P9_ATTR_MTIME) {
1155            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1156                times[1].tv_sec = v9iattr.mtime_sec;
1157                times[1].tv_nsec = v9iattr.mtime_nsec;
1158            } else {
1159                times[1].tv_nsec = UTIME_NOW;
1160            }
1161        } else {
1162            times[1].tv_nsec = UTIME_OMIT;
1163        }
1164        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1165        if (err < 0) {
1166            goto out;
1167        }
1168    }
1169    /*
1170     * If the only valid entry in iattr is ctime we can call
1171     * chown(-1,-1) to update the ctime of the file
1172     */
1173    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1174        ((v9iattr.valid & P9_ATTR_CTIME)
1175         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1176        if (!(v9iattr.valid & P9_ATTR_UID)) {
1177            v9iattr.uid = -1;
1178        }
1179        if (!(v9iattr.valid & P9_ATTR_GID)) {
1180            v9iattr.gid = -1;
1181        }
1182        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1183                            v9iattr.gid);
1184        if (err < 0) {
1185            goto out;
1186        }
1187    }
1188    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1189        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1190        if (err < 0) {
1191            goto out;
1192        }
1193    }
1194    err = offset;
1195out:
1196    put_fid(pdu, fidp);
1197out_nofid:
1198    complete_pdu(s, pdu, err);
1199}
1200
1201static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1202{
1203    int i;
1204    ssize_t err;
1205    size_t offset = 7;
1206
1207    err = pdu_marshal(pdu, offset, "w", nwnames);
1208    if (err < 0) {
1209        return err;
1210    }
1211    offset += err;
1212    for (i = 0; i < nwnames; i++) {
1213        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1214        if (err < 0) {
1215            return err;
1216        }
1217        offset += err;
1218    }
1219    return offset;
1220}
1221
1222static void v9fs_walk(void *opaque)
1223{
1224    int name_idx;
1225    V9fsQID *qids = NULL;
1226    int i, err = 0;
1227    V9fsPath dpath, path;
1228    uint16_t nwnames;
1229    struct stat stbuf;
1230    size_t offset = 7;
1231    int32_t fid, newfid;
1232    V9fsString *wnames = NULL;
1233    V9fsFidState *fidp;
1234    V9fsFidState *newfidp = NULL;
1235    V9fsPDU *pdu = opaque;
1236    V9fsState *s = pdu->s;
1237
1238    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1239    if (err < 0) {
1240        complete_pdu(s, pdu, err);
1241        return ;
1242    }
1243    offset += err;
1244
1245    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1246
1247    if (nwnames && nwnames <= P9_MAXWELEM) {
1248        wnames = g_malloc0(sizeof(wnames[0]) * nwnames);
1249        qids   = g_malloc0(sizeof(qids[0]) * nwnames);
1250        for (i = 0; i < nwnames; i++) {
1251            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1252            if (err < 0) {
1253                goto out_nofid;
1254            }
1255            offset += err;
1256        }
1257    } else if (nwnames > P9_MAXWELEM) {
1258        err = -EINVAL;
1259        goto out_nofid;
1260    }
1261    fidp = get_fid(pdu, fid);
1262    if (fidp == NULL) {
1263        err = -ENOENT;
1264        goto out_nofid;
1265    }
1266    v9fs_path_init(&dpath);
1267    v9fs_path_init(&path);
1268    /*
1269     * Both dpath and path initially poin to fidp.
1270     * Needed to handle request with nwnames == 0
1271     */
1272    v9fs_path_copy(&dpath, &fidp->path);
1273    v9fs_path_copy(&path, &fidp->path);
1274    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1275        err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, &path);
1276        if (err < 0) {
1277            goto out;
1278        }
1279        err = v9fs_co_lstat(pdu, &path, &stbuf);
1280        if (err < 0) {
1281            goto out;
1282        }
1283        stat_to_qid(&stbuf, &qids[name_idx]);
1284        v9fs_path_copy(&dpath, &path);
1285    }
1286    if (fid == newfid) {
1287        BUG_ON(fidp->fid_type != P9_FID_NONE);
1288        v9fs_path_copy(&fidp->path, &path);
1289    } else {
1290        newfidp = alloc_fid(s, newfid);
1291        if (newfidp == NULL) {
1292            err = -EINVAL;
1293            goto out;
1294        }
1295        newfidp->uid = fidp->uid;
1296        v9fs_path_copy(&newfidp->path, &path);
1297    }
1298    err = v9fs_walk_marshal(pdu, nwnames, qids);
1299    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1300out:
1301    put_fid(pdu, fidp);
1302    if (newfidp) {
1303        put_fid(pdu, newfidp);
1304    }
1305    v9fs_path_free(&dpath);
1306    v9fs_path_free(&path);
1307out_nofid:
1308    complete_pdu(s, pdu, err);
1309    if (nwnames && nwnames <= P9_MAXWELEM) {
1310        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1311            v9fs_string_free(&wnames[name_idx]);
1312        }
1313        g_free(wnames);
1314        g_free(qids);
1315    }
1316}
1317
1318static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
1319{
1320    struct statfs stbuf;
1321    int32_t iounit = 0;
1322    V9fsState *s = pdu->s;
1323
1324    /*
1325     * iounit should be multiples of f_bsize (host filesystem block size
1326     * and as well as less than (client msize - P9_IOHDRSZ))
1327     */
1328    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1329        iounit = stbuf.f_bsize;
1330        iounit *= (s->msize - P9_IOHDRSZ)/stbuf.f_bsize;
1331    }
1332    if (!iounit) {
1333        iounit = s->msize - P9_IOHDRSZ;
1334    }
1335    return iounit;
1336}
1337
1338static void v9fs_open(void *opaque)
1339{
1340    int flags;
1341    int32_t fid;
1342    int32_t mode;
1343    V9fsQID qid;
1344    int iounit = 0;
1345    ssize_t err = 0;
1346    size_t offset = 7;
1347    struct stat stbuf;
1348    V9fsFidState *fidp;
1349    V9fsPDU *pdu = opaque;
1350    V9fsState *s = pdu->s;
1351
1352    if (s->proto_version == V9FS_PROTO_2000L) {
1353        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1354    } else {
1355        uint8_t modebyte;
1356        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1357        mode = modebyte;
1358    }
1359    if (err < 0) {
1360        goto out_nofid;
1361    }
1362    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1363
1364    fidp = get_fid(pdu, fid);
1365    if (fidp == NULL) {
1366        err = -ENOENT;
1367        goto out_nofid;
1368    }
1369    BUG_ON(fidp->fid_type != P9_FID_NONE);
1370
1371    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1372    if (err < 0) {
1373        goto out;
1374    }
1375    stat_to_qid(&stbuf, &qid);
1376    if (S_ISDIR(stbuf.st_mode)) {
1377        err = v9fs_co_opendir(pdu, fidp);
1378        if (err < 0) {
1379            goto out;
1380        }
1381        fidp->fid_type = P9_FID_DIR;
1382        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1383        if (err < 0) {
1384            goto out;
1385        }
1386        err += offset;
1387    } else {
1388        if (s->proto_version == V9FS_PROTO_2000L) {
1389            flags = get_dotl_openflags(s, mode);
1390        } else {
1391            flags = omode_to_uflags(mode);
1392        }
1393        if (is_ro_export(&s->ctx)) {
1394            if (mode & O_WRONLY || mode & O_RDWR ||
1395                mode & O_APPEND || mode & O_TRUNC) {
1396                err = -EROFS;
1397                goto out;
1398            }
1399        }
1400        err = v9fs_co_open(pdu, fidp, flags);
1401        if (err < 0) {
1402            goto out;
1403        }
1404        fidp->fid_type = P9_FID_FILE;
1405        fidp->open_flags = flags;
1406        if (flags & O_EXCL) {
1407            /*
1408             * We let the host file system do O_EXCL check
1409             * We should not reclaim such fd
1410             */
1411            fidp->flags |= FID_NON_RECLAIMABLE;
1412        }
1413        iounit = get_iounit(pdu, &fidp->path);
1414        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1415        if (err < 0) {
1416            goto out;
1417        }
1418        err += offset;
1419    }
1420    trace_v9fs_open_return(pdu->tag, pdu->id,
1421                           qid.type, qid.version, qid.path, iounit);
1422out:
1423    put_fid(pdu, fidp);
1424out_nofid:
1425    complete_pdu(s, pdu, err);
1426}
1427
1428static void v9fs_lcreate(void *opaque)
1429{
1430    int32_t dfid, flags, mode;
1431    gid_t gid;
1432    ssize_t err = 0;
1433    ssize_t offset = 7;
1434    V9fsString name;
1435    V9fsFidState *fidp;
1436    struct stat stbuf;
1437    V9fsQID qid;
1438    int32_t iounit;
1439    V9fsPDU *pdu = opaque;
1440
1441    v9fs_string_init(&name);
1442    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1443                        &name, &flags, &mode, &gid);
1444    if (err < 0) {
1445        goto out_nofid;
1446    }
1447    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1448
1449    fidp = get_fid(pdu, dfid);
1450    if (fidp == NULL) {
1451        err = -ENOENT;
1452        goto out_nofid;
1453    }
1454
1455    flags = get_dotl_openflags(pdu->s, flags);
1456    err = v9fs_co_open2(pdu, fidp, &name, gid,
1457                        flags | O_CREAT, mode, &stbuf);
1458    if (err < 0) {
1459        goto out;
1460    }
1461    fidp->fid_type = P9_FID_FILE;
1462    fidp->open_flags = flags;
1463    if (flags & O_EXCL) {
1464        /*
1465         * We let the host file system do O_EXCL check
1466         * We should not reclaim such fd
1467         */
1468        fidp->flags |= FID_NON_RECLAIMABLE;
1469    }
1470    iounit =  get_iounit(pdu, &fidp->path);
1471    stat_to_qid(&stbuf, &qid);
1472    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1473    if (err < 0) {
1474        goto out;
1475    }
1476    err += offset;
1477    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
1478                              qid.type, qid.version, qid.path, iounit);
1479out:
1480    put_fid(pdu, fidp);
1481out_nofid:
1482    complete_pdu(pdu->s, pdu, err);
1483    v9fs_string_free(&name);
1484}
1485
1486static void v9fs_fsync(void *opaque)
1487{
1488    int err;
1489    int32_t fid;
1490    int datasync;
1491    size_t offset = 7;
1492    V9fsFidState *fidp;
1493    V9fsPDU *pdu = opaque;
1494    V9fsState *s = pdu->s;
1495
1496    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
1497    if (err < 0) {
1498        goto out_nofid;
1499    }
1500    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
1501
1502    fidp = get_fid(pdu, fid);
1503    if (fidp == NULL) {
1504        err = -ENOENT;
1505        goto out_nofid;
1506    }
1507    err = v9fs_co_fsync(pdu, fidp, datasync);
1508    if (!err) {
1509        err = offset;
1510    }
1511    put_fid(pdu, fidp);
1512out_nofid:
1513    complete_pdu(s, pdu, err);
1514}
1515
1516static void v9fs_clunk(void *opaque)
1517{
1518    int err;
1519    int32_t fid;
1520    size_t offset = 7;
1521    V9fsFidState *fidp;
1522    V9fsPDU *pdu = opaque;
1523    V9fsState *s = pdu->s;
1524
1525    err = pdu_unmarshal(pdu, offset, "d", &fid);
1526    if (err < 0) {
1527        goto out_nofid;
1528    }
1529    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
1530
1531    fidp = clunk_fid(s, fid);
1532    if (fidp == NULL) {
1533        err = -ENOENT;
1534        goto out_nofid;
1535    }
1536    /*
1537     * Bump the ref so that put_fid will
1538     * free the fid.
1539     */
1540    fidp->ref++;
1541    err = put_fid(pdu, fidp);
1542    if (!err) {
1543        err = offset;
1544    }
1545out_nofid:
1546    complete_pdu(s, pdu, err);
1547}
1548
1549static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1550                           uint64_t off, uint32_t max_count)
1551{
1552    ssize_t err;
1553    size_t offset = 7;
1554    int read_count;
1555    int64_t xattr_len;
1556
1557    xattr_len = fidp->fs.xattr.len;
1558    read_count = xattr_len - off;
1559    if (read_count > max_count) {
1560        read_count = max_count;
1561    } else if (read_count < 0) {
1562        /*
1563         * read beyond XATTR value
1564         */
1565        read_count = 0;
1566    }
1567    err = pdu_marshal(pdu, offset, "d", read_count);
1568    if (err < 0) {
1569        return err;
1570    }
1571    offset += err;
1572    err = v9fs_pack(pdu->elem.in_sg, pdu->elem.in_num, offset,
1573                    ((char *)fidp->fs.xattr.value) + off,
1574                    read_count);
1575    if (err < 0) {
1576        return err;
1577    }
1578    offset += err;
1579    return offset;
1580}
1581
1582static int v9fs_do_readdir_with_stat(V9fsPDU *pdu,
1583                                     V9fsFidState *fidp, uint32_t max_count)
1584{
1585    V9fsPath path;
1586    V9fsStat v9stat;
1587    int len, err = 0;
1588    int32_t count = 0;
1589    struct stat stbuf;
1590    off_t saved_dir_pos;
1591    struct dirent *dent, *result;
1592
1593    /* save the directory position */
1594    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1595    if (saved_dir_pos < 0) {
1596        return saved_dir_pos;
1597    }
1598
1599    dent = g_malloc(sizeof(struct dirent));
1600
1601    while (1) {
1602        v9fs_path_init(&path);
1603        err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1604        if (err || !result) {
1605            break;
1606        }
1607        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
1608        if (err < 0) {
1609            goto out;
1610        }
1611        err = v9fs_co_lstat(pdu, &path, &stbuf);
1612        if (err < 0) {
1613            goto out;
1614        }
1615        err = stat_to_v9stat(pdu, &path, &stbuf, &v9stat);
1616        if (err < 0) {
1617            goto out;
1618        }
1619        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1620        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
1621        if ((len != (v9stat.size + 2)) || ((count + len) > max_count)) {
1622            /* Ran out of buffer. Set dir back to old position and return */
1623            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1624            v9fs_stat_free(&v9stat);
1625            v9fs_path_free(&path);
1626            g_free(dent);
1627            return count;
1628        }
1629        count += len;
1630        v9fs_stat_free(&v9stat);
1631        v9fs_path_free(&path);
1632        saved_dir_pos = dent->d_off;
1633    }
1634out:
1635    g_free(dent);
1636    v9fs_path_free(&path);
1637    if (err < 0) {
1638        return err;
1639    }
1640    return count;
1641}
1642
1643/*
1644 * Create a QEMUIOVector for a sub-region of PDU iovecs
1645 *
1646 * @qiov:       uninitialized QEMUIOVector
1647 * @skip:       number of bytes to skip from beginning of PDU
1648 * @size:       number of bytes to include
1649 * @is_write:   true - write, false - read
1650 *
1651 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
1652 * with qemu_iovec_destroy().
1653 */
1654static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
1655                                    size_t skip, size_t size,
1656                                    bool is_write)
1657{
1658    QEMUIOVector elem;
1659    struct iovec *iov;
1660    unsigned int niov;
1661
1662    if (is_write) {
1663        iov = pdu->elem.out_sg;
1664        niov = pdu->elem.out_num;
1665    } else {
1666        iov = pdu->elem.in_sg;
1667        niov = pdu->elem.in_num;
1668    }
1669
1670    qemu_iovec_init_external(&elem, iov, niov);
1671    qemu_iovec_init(qiov, niov);
1672    qemu_iovec_concat(qiov, &elem, skip, size);
1673}
1674
1675static void v9fs_read(void *opaque)
1676{
1677    int32_t fid;
1678    uint64_t off;
1679    ssize_t err = 0;
1680    int32_t count = 0;
1681    size_t offset = 7;
1682    uint32_t max_count;
1683    V9fsFidState *fidp;
1684    V9fsPDU *pdu = opaque;
1685    V9fsState *s = pdu->s;
1686
1687    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
1688    if (err < 0) {
1689        goto out_nofid;
1690    }
1691    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
1692
1693    fidp = get_fid(pdu, fid);
1694    if (fidp == NULL) {
1695        err = -EINVAL;
1696        goto out_nofid;
1697    }
1698    if (fidp->fid_type == P9_FID_DIR) {
1699
1700        if (off == 0) {
1701            v9fs_co_rewinddir(pdu, fidp);
1702        }
1703        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
1704        if (count < 0) {
1705            err = count;
1706            goto out;
1707        }
1708        err = pdu_marshal(pdu, offset, "d", count);
1709        if (err < 0) {
1710            goto out;
1711        }
1712        err += offset + count;
1713    } else if (fidp->fid_type == P9_FID_FILE) {
1714        QEMUIOVector qiov_full;
1715        QEMUIOVector qiov;
1716        int32_t len;
1717
1718        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
1719        qemu_iovec_init(&qiov, qiov_full.niov);
1720        do {
1721            qemu_iovec_reset(&qiov);
1722            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
1723            if (0) {
1724                print_sg(qiov.iov, qiov.niov);
1725            }
1726            /* Loop in case of EINTR */
1727            do {
1728                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
1729                if (len >= 0) {
1730                    off   += len;
1731                    count += len;
1732                }
1733            } while (len == -EINTR && !pdu->cancelled);
1734            if (len < 0) {
1735                /* IO error return the error */
1736                err = len;
1737                goto out;
1738            }
1739        } while (count < max_count && len > 0);
1740        err = pdu_marshal(pdu, offset, "d", count);
1741        if (err < 0) {
1742            goto out;
1743        }
1744        err += offset + count;
1745        qemu_iovec_destroy(&qiov);
1746        qemu_iovec_destroy(&qiov_full);
1747    } else if (fidp->fid_type == P9_FID_XATTR) {
1748        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
1749    } else {
1750        err = -EINVAL;
1751    }
1752    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
1753out:
1754    put_fid(pdu, fidp);
1755out_nofid:
1756    complete_pdu(s, pdu, err);
1757}
1758
1759static size_t v9fs_readdir_data_size(V9fsString *name)
1760{
1761    /*
1762     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
1763     * size of type (1) + size of name.size (2) + strlen(name.data)
1764     */
1765    return 24 + v9fs_string_size(name);
1766}
1767
1768static int v9fs_do_readdir(V9fsPDU *pdu,
1769                           V9fsFidState *fidp, int32_t max_count)
1770{
1771    size_t size;
1772    V9fsQID qid;
1773    V9fsString name;
1774    int len, err = 0;
1775    int32_t count = 0;
1776    off_t saved_dir_pos;
1777    struct dirent *dent, *result;
1778
1779    /* save the directory position */
1780    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1781    if (saved_dir_pos < 0) {
1782        return saved_dir_pos;
1783    }
1784
1785    dent = g_malloc(sizeof(struct dirent));
1786
1787    while (1) {
1788        err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1789        if (err || !result) {
1790            break;
1791        }
1792        v9fs_string_init(&name);
1793        v9fs_string_sprintf(&name, "%s", dent->d_name);
1794        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
1795            /* Ran out of buffer. Set dir back to old position and return */
1796            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1797            v9fs_string_free(&name);
1798            g_free(dent);
1799            return count;
1800        }
1801        /*
1802         * Fill up just the path field of qid because the client uses
1803         * only that. To fill the entire qid structure we will have
1804         * to stat each dirent found, which is expensive
1805         */
1806        size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
1807        memcpy(&qid.path, &dent->d_ino, size);
1808        /* Fill the other fields with dummy values */
1809        qid.type = 0;
1810        qid.version = 0;
1811
1812        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1813        len = pdu_marshal(pdu, 11 + count, "Qqbs",
1814                          &qid, dent->d_off,
1815                          dent->d_type, &name);
1816        if (len < 0) {
1817            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1818            v9fs_string_free(&name);
1819            g_free(dent);
1820            return len;
1821        }
1822        count += len;
1823        v9fs_string_free(&name);
1824        saved_dir_pos = dent->d_off;
1825    }
1826    g_free(dent);
1827    if (err < 0) {
1828        return err;
1829    }
1830    return count;
1831}
1832
1833static void v9fs_readdir(void *opaque)
1834{
1835    int32_t fid;
1836    V9fsFidState *fidp;
1837    ssize_t retval = 0;
1838    size_t offset = 7;
1839    uint64_t initial_offset;
1840    int32_t count;
1841    uint32_t max_count;
1842    V9fsPDU *pdu = opaque;
1843    V9fsState *s = pdu->s;
1844
1845    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
1846                           &initial_offset, &max_count);
1847    if (retval < 0) {
1848        goto out_nofid;
1849    }
1850    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
1851
1852    fidp = get_fid(pdu, fid);
1853    if (fidp == NULL) {
1854        retval = -EINVAL;
1855        goto out_nofid;
1856    }
1857    if (!fidp->fs.dir) {
1858        retval = -EINVAL;
1859        goto out;
1860    }
1861    if (initial_offset == 0) {
1862        v9fs_co_rewinddir(pdu, fidp);
1863    } else {
1864        v9fs_co_seekdir(pdu, fidp, initial_offset);
1865    }
1866    count = v9fs_do_readdir(pdu, fidp, max_count);
1867    if (count < 0) {
1868        retval = count;
1869        goto out;
1870    }
1871    retval = pdu_marshal(pdu, offset, "d", count);
1872    if (retval < 0) {
1873        goto out;
1874    }
1875    retval += count + offset;
1876    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
1877out:
1878    put_fid(pdu, fidp);
1879out_nofid:
1880    complete_pdu(s, pdu, retval);
1881}
1882
1883static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1884                            uint64_t off, uint32_t count,
1885                            struct iovec *sg, int cnt)
1886{
1887    int i, to_copy;
1888    ssize_t err = 0;
1889    int write_count;
1890    int64_t xattr_len;
1891    size_t offset = 7;
1892
1893
1894    xattr_len = fidp->fs.xattr.len;
1895    write_count = xattr_len - off;
1896    if (write_count > count) {
1897        write_count = count;
1898    } else if (write_count < 0) {
1899        /*
1900         * write beyond XATTR value len specified in
1901         * xattrcreate
1902         */
1903        err = -ENOSPC;
1904        goto out;
1905    }
1906    err = pdu_marshal(pdu, offset, "d", write_count);
1907    if (err < 0) {
1908        return err;
1909    }
1910    err += offset;
1911    fidp->fs.xattr.copied_len += write_count;
1912    /*
1913     * Now copy the content from sg list
1914     */
1915    for (i = 0; i < cnt; i++) {
1916        if (write_count > sg[i].iov_len) {
1917            to_copy = sg[i].iov_len;
1918        } else {
1919            to_copy = write_count;
1920        }
1921        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
1922        /* updating vs->off since we are not using below */
1923        off += to_copy;
1924        write_count -= to_copy;
1925    }
1926out:
1927    return err;
1928}
1929
1930static void v9fs_write(void *opaque)
1931{
1932    ssize_t err;
1933    int32_t fid;
1934    uint64_t off;
1935    uint32_t count;
1936    int32_t len = 0;
1937    int32_t total = 0;
1938    size_t offset = 7;
1939    V9fsFidState *fidp;
1940    V9fsPDU *pdu = opaque;
1941    V9fsState *s = pdu->s;
1942    QEMUIOVector qiov_full;
1943    QEMUIOVector qiov;
1944
1945    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
1946    if (err < 0) {
1947        return complete_pdu(s, pdu, err);
1948    }
1949    offset += err;
1950    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
1951    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
1952
1953    fidp = get_fid(pdu, fid);
1954    if (fidp == NULL) {
1955        err = -EINVAL;
1956        goto out_nofid;
1957    }
1958    if (fidp->fid_type == P9_FID_FILE) {
1959        if (fidp->fs.fd == -1) {
1960            err = -EINVAL;
1961            goto out;
1962        }
1963    } else if (fidp->fid_type == P9_FID_XATTR) {
1964        /*
1965         * setxattr operation
1966         */
1967        err = v9fs_xattr_write(s, pdu, fidp, off, count,
1968                               qiov_full.iov, qiov_full.niov);
1969        goto out;
1970    } else {
1971        err = -EINVAL;
1972        goto out;
1973    }
1974    qemu_iovec_init(&qiov, qiov_full.niov);
1975    do {
1976        qemu_iovec_reset(&qiov);
1977        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
1978        if (0) {
1979            print_sg(qiov.iov, qiov.niov);
1980        }
1981        /* Loop in case of EINTR */
1982        do {
1983            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
1984            if (len >= 0) {
1985                off   += len;
1986                total += len;
1987            }
1988        } while (len == -EINTR && !pdu->cancelled);
1989        if (len < 0) {
1990            /* IO error return the error */
1991            err = len;
1992            goto out_qiov;
1993        }
1994    } while (total < count && len > 0);
1995
1996    offset = 7;
1997    err = pdu_marshal(pdu, offset, "d", total);
1998    if (err < 0) {
1999        goto out;
2000    }
2001    err += offset;
2002    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2003out_qiov:
2004    qemu_iovec_destroy(&qiov);
2005out:
2006    put_fid(pdu, fidp);
2007out_nofid:
2008    qemu_iovec_destroy(&qiov_full);
2009    complete_pdu(s, pdu, err);
2010}
2011
2012static void v9fs_create(void *opaque)
2013{
2014    int32_t fid;
2015    int err = 0;
2016    size_t offset = 7;
2017    V9fsFidState *fidp;
2018    V9fsQID qid;
2019    int32_t perm;
2020    int8_t mode;
2021    V9fsPath path;
2022    struct stat stbuf;
2023    V9fsString name;
2024    V9fsString extension;
2025    int iounit;
2026    V9fsPDU *pdu = opaque;
2027
2028    v9fs_path_init(&path);
2029    v9fs_string_init(&name);
2030    v9fs_string_init(&extension);
2031    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2032                        &perm, &mode, &extension);
2033    if (err < 0) {
2034        goto out_nofid;
2035    }
2036    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2037
2038    fidp = get_fid(pdu, fid);
2039    if (fidp == NULL) {
2040        err = -EINVAL;
2041        goto out_nofid;
2042    }
2043    if (perm & P9_STAT_MODE_DIR) {
2044        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2045                            fidp->uid, -1, &stbuf);
2046        if (err < 0) {
2047            goto out;
2048        }
2049        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2050        if (err < 0) {
2051            goto out;
2052        }
2053        v9fs_path_copy(&fidp->path, &path);
2054        err = v9fs_co_opendir(pdu, fidp);
2055        if (err < 0) {
2056            goto out;
2057        }
2058        fidp->fid_type = P9_FID_DIR;
2059    } else if (perm & P9_STAT_MODE_SYMLINK) {
2060        err = v9fs_co_symlink(pdu, fidp, &name,
2061                              extension.data, -1 , &stbuf);
2062        if (err < 0) {
2063            goto out;
2064        }
2065        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2066        if (err < 0) {
2067            goto out;
2068        }
2069        v9fs_path_copy(&fidp->path, &path);
2070    } else if (perm & P9_STAT_MODE_LINK) {
2071        int32_t ofid = atoi(extension.data);
2072        V9fsFidState *ofidp = get_fid(pdu, ofid);
2073        if (ofidp == NULL) {
2074            err = -EINVAL;
2075            goto out;
2076        }
2077        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2078        put_fid(pdu, ofidp);
2079        if (err < 0) {
2080            goto out;
2081        }
2082        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2083        if (err < 0) {
2084            fidp->fid_type = P9_FID_NONE;
2085            goto out;
2086        }
2087        v9fs_path_copy(&fidp->path, &path);
2088        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2089        if (err < 0) {
2090            fidp->fid_type = P9_FID_NONE;
2091            goto out;
2092        }
2093    } else if (perm & P9_STAT_MODE_DEVICE) {
2094        char ctype;
2095        uint32_t major, minor;
2096        mode_t nmode = 0;
2097
2098        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2099            err = -errno;
2100            goto out;
2101        }
2102
2103        switch (ctype) {
2104        case 'c':
2105            nmode = S_IFCHR;
2106            break;
2107        case 'b':
2108            nmode = S_IFBLK;
2109            break;
2110        default:
2111            err = -EIO;
2112            goto out;
2113        }
2114
2115        nmode |= perm & 0777;
2116        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2117                            makedev(major, minor), nmode, &stbuf);
2118        if (err < 0) {
2119            goto out;
2120        }
2121        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2122        if (err < 0) {
2123            goto out;
2124        }
2125        v9fs_path_copy(&fidp->path, &path);
2126    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2127        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2128                            0, S_IFIFO | (perm & 0777), &stbuf);
2129        if (err < 0) {
2130            goto out;
2131        }
2132        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2133        if (err < 0) {
2134            goto out;
2135        }
2136        v9fs_path_copy(&fidp->path, &path);
2137    } else if (perm & P9_STAT_MODE_SOCKET) {
2138        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2139                            0, S_IFSOCK | (perm & 0777), &stbuf);
2140        if (err < 0) {
2141            goto out;
2142        }
2143        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2144        if (err < 0) {
2145            goto out;
2146        }
2147        v9fs_path_copy(&fidp->path, &path);
2148    } else {
2149        err = v9fs_co_open2(pdu, fidp, &name, -1,
2150                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2151        if (err < 0) {
2152            goto out;
2153        }
2154        fidp->fid_type = P9_FID_FILE;
2155        fidp->open_flags = omode_to_uflags(mode);
2156        if (fidp->open_flags & O_EXCL) {
2157            /*
2158             * We let the host file system do O_EXCL check
2159             * We should not reclaim such fd
2160             */
2161            fidp->flags |= FID_NON_RECLAIMABLE;
2162        }
2163    }
2164    iounit = get_iounit(pdu, &fidp->path);
2165    stat_to_qid(&stbuf, &qid);
2166    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2167    if (err < 0) {
2168        goto out;
2169    }
2170    err += offset;
2171    trace_v9fs_create_return(pdu->tag, pdu->id,
2172                             qid.type, qid.version, qid.path, iounit);
2173out:
2174    put_fid(pdu, fidp);
2175out_nofid:
2176   complete_pdu(pdu->s, pdu, err);
2177   v9fs_string_free(&name);
2178   v9fs_string_free(&extension);
2179   v9fs_path_free(&path);
2180}
2181
2182static void v9fs_symlink(void *opaque)
2183{
2184    V9fsPDU *pdu = opaque;
2185    V9fsString name;
2186    V9fsString symname;
2187    V9fsFidState *dfidp;
2188    V9fsQID qid;
2189    struct stat stbuf;
2190    int32_t dfid;
2191    int err = 0;
2192    gid_t gid;
2193    size_t offset = 7;
2194
2195    v9fs_string_init(&name);
2196    v9fs_string_init(&symname);
2197    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2198    if (err < 0) {
2199        goto out_nofid;
2200    }
2201    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2202
2203    dfidp = get_fid(pdu, dfid);
2204    if (dfidp == NULL) {
2205        err = -EINVAL;
2206        goto out_nofid;
2207    }
2208    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2209    if (err < 0) {
2210        goto out;
2211    }
2212    stat_to_qid(&stbuf, &qid);
2213    err =  pdu_marshal(pdu, offset, "Q", &qid);
2214    if (err < 0) {
2215        goto out;
2216    }
2217    err += offset;
2218    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2219                              qid.type, qid.version, qid.path);
2220out:
2221    put_fid(pdu, dfidp);
2222out_nofid:
2223    complete_pdu(pdu->s, pdu, err);
2224    v9fs_string_free(&name);
2225    v9fs_string_free(&symname);
2226}
2227
2228static void v9fs_flush(void *opaque)
2229{
2230    ssize_t err;
2231    int16_t tag;
2232    size_t offset = 7;
2233    V9fsPDU *cancel_pdu;
2234    V9fsPDU *pdu = opaque;
2235    V9fsState *s = pdu->s;
2236
2237    err = pdu_unmarshal(pdu, offset, "w", &tag);
2238    if (err < 0) {
2239        complete_pdu(s, pdu, err);
2240        return;
2241    }
2242    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2243
2244    QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2245        if (cancel_pdu->tag == tag) {
2246            break;
2247        }
2248    }
2249    if (cancel_pdu) {
2250        cancel_pdu->cancelled = 1;
2251        /*
2252         * Wait for pdu to complete.
2253         */
2254        qemu_co_queue_wait(&cancel_pdu->complete);
2255        cancel_pdu->cancelled = 0;
2256        free_pdu(pdu->s, cancel_pdu);
2257    }
2258    complete_pdu(s, pdu, 7);
2259}
2260
2261static void v9fs_link(void *opaque)
2262{
2263    V9fsPDU *pdu = opaque;
2264    V9fsState *s = pdu->s;
2265    int32_t dfid, oldfid;
2266    V9fsFidState *dfidp, *oldfidp;
2267    V9fsString name;
2268    size_t offset = 7;
2269    int err = 0;
2270
2271    v9fs_string_init(&name);
2272    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2273    if (err < 0) {
2274        goto out_nofid;
2275    }
2276    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2277
2278    dfidp = get_fid(pdu, dfid);
2279    if (dfidp == NULL) {
2280        err = -ENOENT;
2281        goto out_nofid;
2282    }
2283
2284    oldfidp = get_fid(pdu, oldfid);
2285    if (oldfidp == NULL) {
2286        err = -ENOENT;
2287        goto out;
2288    }
2289    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2290    if (!err) {
2291        err = offset;
2292    }
2293out:
2294    put_fid(pdu, dfidp);
2295out_nofid:
2296    v9fs_string_free(&name);
2297    complete_pdu(s, pdu, err);
2298}
2299
2300/* Only works with path name based fid */
2301static void v9fs_remove(void *opaque)
2302{
2303    int32_t fid;
2304    int err = 0;
2305    size_t offset = 7;
2306    V9fsFidState *fidp;
2307    V9fsPDU *pdu = opaque;
2308
2309    err = pdu_unmarshal(pdu, offset, "d", &fid);
2310    if (err < 0) {
2311        goto out_nofid;
2312    }
2313    trace_v9fs_remove(pdu->tag, pdu->id, fid);
2314
2315    fidp = get_fid(pdu, fid);
2316    if (fidp == NULL) {
2317        err = -EINVAL;
2318        goto out_nofid;
2319    }
2320    /* if fs driver is not path based, return EOPNOTSUPP */
2321    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2322        err = -EOPNOTSUPP;
2323        goto out_err;
2324    }
2325    /*
2326     * IF the file is unlinked, we cannot reopen
2327     * the file later. So don't reclaim fd
2328     */
2329    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2330    if (err < 0) {
2331        goto out_err;
2332    }
2333    err = v9fs_co_remove(pdu, &fidp->path);
2334    if (!err) {
2335        err = offset;
2336    }
2337out_err:
2338    /* For TREMOVE we need to clunk the fid even on failed remove */
2339    clunk_fid(pdu->s, fidp->fid);
2340    put_fid(pdu, fidp);
2341out_nofid:
2342    complete_pdu(pdu->s, pdu, err);
2343}
2344
2345static void v9fs_unlinkat(void *opaque)
2346{
2347    int err = 0;
2348    V9fsString name;
2349    int32_t dfid, flags;
2350    size_t offset = 7;
2351    V9fsPath path;
2352    V9fsFidState *dfidp;
2353    V9fsPDU *pdu = opaque;
2354
2355    v9fs_string_init(&name);
2356    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
2357    if (err < 0) {
2358        goto out_nofid;
2359    }
2360    dfidp = get_fid(pdu, dfid);
2361    if (dfidp == NULL) {
2362        err = -EINVAL;
2363        goto out_nofid;
2364    }
2365    /*
2366     * IF the file is unlinked, we cannot reopen
2367     * the file later. So don't reclaim fd
2368     */
2369    v9fs_path_init(&path);
2370    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
2371    if (err < 0) {
2372        goto out_err;
2373    }
2374    err = v9fs_mark_fids_unreclaim(pdu, &path);
2375    if (err < 0) {
2376        goto out_err;
2377    }
2378    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, flags);
2379    if (!err) {
2380        err = offset;
2381    }
2382out_err:
2383    put_fid(pdu, dfidp);
2384    v9fs_path_free(&path);
2385out_nofid:
2386    complete_pdu(pdu->s, pdu, err);
2387    v9fs_string_free(&name);
2388}
2389
2390
2391/* Only works with path name based fid */
2392static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
2393                                int32_t newdirfid, V9fsString *name)
2394{
2395    char *end;
2396    int err = 0;
2397    V9fsPath new_path;
2398    V9fsFidState *tfidp;
2399    V9fsState *s = pdu->s;
2400    V9fsFidState *dirfidp = NULL;
2401    char *old_name, *new_name;
2402
2403    v9fs_path_init(&new_path);
2404    if (newdirfid != -1) {
2405        dirfidp = get_fid(pdu, newdirfid);
2406        if (dirfidp == NULL) {
2407            err = -ENOENT;
2408            goto out_nofid;
2409        }
2410        BUG_ON(dirfidp->fid_type != P9_FID_NONE);
2411        v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
2412    } else {
2413        old_name = fidp->path.data;
2414        end = strrchr(old_name, '/');
2415        if (end) {
2416            end++;
2417        } else {
2418            end = old_name;
2419        }
2420        new_name = g_malloc0(end - old_name + name->size + 1);
2421        strncat(new_name, old_name, end - old_name);
2422        strncat(new_name + (end - old_name), name->data, name->size);
2423        v9fs_co_name_to_path(pdu, NULL, new_name, &new_path);
2424        g_free(new_name);
2425    }
2426    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
2427    if (err < 0) {
2428        goto out;
2429    }
2430    /*
2431     * Fixup fid's pointing to the old name to
2432     * start pointing to the new name
2433     */
2434    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2435        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
2436            /* replace the name */
2437            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
2438        }
2439    }
2440out:
2441    if (dirfidp) {
2442        put_fid(pdu, dirfidp);
2443    }
2444    v9fs_path_free(&new_path);
2445out_nofid:
2446    return err;
2447}
2448
2449/* Only works with path name based fid */
2450static void v9fs_rename(void *opaque)
2451{
2452    int32_t fid;
2453    ssize_t err = 0;
2454    size_t offset = 7;
2455    V9fsString name;
2456    int32_t newdirfid;
2457    V9fsFidState *fidp;
2458    V9fsPDU *pdu = opaque;
2459    V9fsState *s = pdu->s;
2460
2461    v9fs_string_init(&name);
2462    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
2463    if (err < 0) {
2464        goto out_nofid;
2465    }
2466    fidp = get_fid(pdu, fid);
2467    if (fidp == NULL) {
2468        err = -ENOENT;
2469        goto out_nofid;
2470    }
2471    BUG_ON(fidp->fid_type != P9_FID_NONE);
2472    /* if fs driver is not path based, return EOPNOTSUPP */
2473    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2474        err = -EOPNOTSUPP;
2475        goto out;
2476    }
2477    v9fs_path_write_lock(s);
2478    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
2479    v9fs_path_unlock(s);
2480    if (!err) {
2481        err = offset;
2482    }
2483out:
2484    put_fid(pdu, fidp);
2485out_nofid:
2486    complete_pdu(s, pdu, err);
2487    v9fs_string_free(&name);
2488}
2489
2490static void v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
2491                               V9fsString *old_name, V9fsPath *newdir,
2492                               V9fsString *new_name)
2493{
2494    V9fsFidState *tfidp;
2495    V9fsPath oldpath, newpath;
2496    V9fsState *s = pdu->s;
2497
2498
2499    v9fs_path_init(&oldpath);
2500    v9fs_path_init(&newpath);
2501    v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
2502    v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
2503
2504    /*
2505     * Fixup fid's pointing to the old name to
2506     * start pointing to the new name
2507     */
2508    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2509        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
2510            /* replace the name */
2511            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
2512        }
2513    }
2514    v9fs_path_free(&oldpath);
2515    v9fs_path_free(&newpath);
2516}
2517
2518static int v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
2519                                  V9fsString *old_name, int32_t newdirfid,
2520                                  V9fsString *new_name)
2521{
2522    int err = 0;
2523    V9fsState *s = pdu->s;
2524    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
2525
2526    olddirfidp = get_fid(pdu, olddirfid);
2527    if (olddirfidp == NULL) {
2528        err = -ENOENT;
2529        goto out;
2530    }
2531    if (newdirfid != -1) {
2532        newdirfidp = get_fid(pdu, newdirfid);
2533        if (newdirfidp == NULL) {
2534            err = -ENOENT;
2535            goto out;
2536        }
2537    } else {
2538        newdirfidp = get_fid(pdu, olddirfid);
2539    }
2540
2541    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
2542                           &newdirfidp->path, new_name);
2543    if (err < 0) {
2544        goto out;
2545    }
2546    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
2547        /* Only for path based fid  we need to do the below fixup */
2548        v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
2549                           &newdirfidp->path, new_name);
2550    }
2551out:
2552    if (olddirfidp) {
2553        put_fid(pdu, olddirfidp);
2554    }
2555    if (newdirfidp) {
2556        put_fid(pdu, newdirfidp);
2557    }
2558    return err;
2559}
2560
2561static void v9fs_renameat(void *opaque)
2562{
2563    ssize_t err = 0;
2564    size_t offset = 7;
2565    V9fsPDU *pdu = opaque;
2566    V9fsState *s = pdu->s;
2567    int32_t olddirfid, newdirfid;
2568    V9fsString old_name, new_name;
2569
2570    v9fs_string_init(&old_name);
2571    v9fs_string_init(&new_name);
2572    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
2573                        &old_name, &newdirfid, &new_name);
2574    if (err < 0) {
2575        goto out_err;
2576    }
2577
2578    v9fs_path_write_lock(s);
2579    err = v9fs_complete_renameat(pdu, olddirfid,
2580                                 &old_name, newdirfid, &new_name);
2581    v9fs_path_unlock(s);
2582    if (!err) {
2583        err = offset;
2584    }
2585
2586out_err:
2587    complete_pdu(s, pdu, err);
2588    v9fs_string_free(&old_name);
2589    v9fs_string_free(&new_name);
2590}
2591
2592static void v9fs_wstat(void *opaque)
2593{
2594    int32_t fid;
2595    int err = 0;
2596    int16_t unused;
2597    V9fsStat v9stat;
2598    size_t offset = 7;
2599    struct stat stbuf;
2600    V9fsFidState *fidp;
2601    V9fsPDU *pdu = opaque;
2602    V9fsState *s = pdu->s;
2603
2604    v9fs_stat_init(&v9stat);
2605    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
2606    if (err < 0) {
2607        goto out_nofid;
2608    }
2609    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
2610                     v9stat.mode, v9stat.atime, v9stat.mtime);
2611
2612    fidp = get_fid(pdu, fid);
2613    if (fidp == NULL) {
2614        err = -EINVAL;
2615        goto out_nofid;
2616    }
2617    /* do we need to sync the file? */
2618    if (donttouch_stat(&v9stat)) {
2619        err = v9fs_co_fsync(pdu, fidp, 0);
2620        goto out;
2621    }
2622    if (v9stat.mode != -1) {
2623        uint32_t v9_mode;
2624        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2625        if (err < 0) {
2626            goto out;
2627        }
2628        v9_mode = stat_to_v9mode(&stbuf);
2629        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
2630            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
2631            /* Attempting to change the type */
2632            err = -EIO;
2633            goto out;
2634        }
2635        err = v9fs_co_chmod(pdu, &fidp->path,
2636                            v9mode_to_mode(v9stat.mode,
2637                                           &v9stat.extension));
2638        if (err < 0) {
2639            goto out;
2640        }
2641    }
2642    if (v9stat.mtime != -1 || v9stat.atime != -1) {
2643        struct timespec times[2];
2644        if (v9stat.atime != -1) {
2645            times[0].tv_sec = v9stat.atime;
2646            times[0].tv_nsec = 0;
2647        } else {
2648            times[0].tv_nsec = UTIME_OMIT;
2649        }
2650        if (v9stat.mtime != -1) {
2651            times[1].tv_sec = v9stat.mtime;
2652            times[1].tv_nsec = 0;
2653        } else {
2654            times[1].tv_nsec = UTIME_OMIT;
2655        }
2656        err = v9fs_co_utimensat(pdu, &fidp->path, times);
2657        if (err < 0) {
2658            goto out;
2659        }
2660    }
2661    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
2662        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
2663        if (err < 0) {
2664            goto out;
2665        }
2666    }
2667    if (v9stat.name.size != 0) {
2668        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
2669        if (err < 0) {
2670            goto out;
2671        }
2672    }
2673    if (v9stat.length != -1) {
2674        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
2675        if (err < 0) {
2676            goto out;
2677        }
2678    }
2679    err = offset;
2680out:
2681    put_fid(pdu, fidp);
2682out_nofid:
2683    v9fs_stat_free(&v9stat);
2684    complete_pdu(s, pdu, err);
2685}
2686
2687static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
2688{
2689    uint32_t f_type;
2690    uint32_t f_bsize;
2691    uint64_t f_blocks;
2692    uint64_t f_bfree;
2693    uint64_t f_bavail;
2694    uint64_t f_files;
2695    uint64_t f_ffree;
2696    uint64_t fsid_val;
2697    uint32_t f_namelen;
2698    size_t offset = 7;
2699    int32_t bsize_factor;
2700
2701    /*
2702     * compute bsize factor based on host file system block size
2703     * and client msize
2704     */
2705    bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
2706    if (!bsize_factor) {
2707        bsize_factor = 1;
2708    }
2709    f_type  = stbuf->f_type;
2710    f_bsize = stbuf->f_bsize;
2711    f_bsize *= bsize_factor;
2712    /*
2713     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
2714     * adjust(divide) the number of blocks, free blocks and available
2715     * blocks by bsize factor
2716     */
2717    f_blocks = stbuf->f_blocks/bsize_factor;
2718    f_bfree  = stbuf->f_bfree/bsize_factor;
2719    f_bavail = stbuf->f_bavail/bsize_factor;
2720    f_files  = stbuf->f_files;
2721    f_ffree  = stbuf->f_ffree;
2722    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
2723               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
2724    f_namelen = stbuf->f_namelen;
2725
2726    return pdu_marshal(pdu, offset, "ddqqqqqqd",
2727                       f_type, f_bsize, f_blocks, f_bfree,
2728                       f_bavail, f_files, f_ffree,
2729                       fsid_val, f_namelen);
2730}
2731
2732static void v9fs_statfs(void *opaque)
2733{
2734    int32_t fid;
2735    ssize_t retval = 0;
2736    size_t offset = 7;
2737    V9fsFidState *fidp;
2738    struct statfs stbuf;
2739    V9fsPDU *pdu = opaque;
2740    V9fsState *s = pdu->s;
2741
2742    retval = pdu_unmarshal(pdu, offset, "d", &fid);
2743    if (retval < 0) {
2744        goto out_nofid;
2745    }
2746    fidp = get_fid(pdu, fid);
2747    if (fidp == NULL) {
2748        retval = -ENOENT;
2749        goto out_nofid;
2750    }
2751    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
2752    if (retval < 0) {
2753        goto out;
2754    }
2755    retval = v9fs_fill_statfs(s, pdu, &stbuf);
2756    if (retval < 0) {
2757        goto out;
2758    }
2759    retval += offset;
2760out:
2761    put_fid(pdu, fidp);
2762out_nofid:
2763    complete_pdu(s, pdu, retval);
2764}
2765
2766static void v9fs_mknod(void *opaque)
2767{
2768
2769    int mode;
2770    gid_t gid;
2771    int32_t fid;
2772    V9fsQID qid;
2773    int err = 0;
2774    int major, minor;
2775    size_t offset = 7;
2776    V9fsString name;
2777    struct stat stbuf;
2778    V9fsFidState *fidp;
2779    V9fsPDU *pdu = opaque;
2780    V9fsState *s = pdu->s;
2781
2782    v9fs_string_init(&name);
2783    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
2784                        &major, &minor, &gid);
2785    if (err < 0) {
2786        goto out_nofid;
2787    }
2788    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
2789
2790    fidp = get_fid(pdu, fid);
2791    if (fidp == NULL) {
2792        err = -ENOENT;
2793        goto out_nofid;
2794    }
2795    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
2796                        makedev(major, minor), mode, &stbuf);
2797    if (err < 0) {
2798        goto out;
2799    }
2800    stat_to_qid(&stbuf, &qid);
2801    err = pdu_marshal(pdu, offset, "Q", &qid);
2802    if (err < 0) {
2803        goto out;
2804    }
2805    err += offset;
2806    trace_v9fs_mknod_return(pdu->tag, pdu->id,
2807                            qid.type, qid.version, qid.path);
2808out:
2809    put_fid(pdu, fidp);
2810out_nofid:
2811    complete_pdu(s, pdu, err);
2812    v9fs_string_free(&name);
2813}
2814
2815/*
2816 * Implement posix byte range locking code
2817 * Server side handling of locking code is very simple, because 9p server in
2818 * QEMU can handle only one client. And most of the lock handling
2819 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
2820 * do any thing in * qemu 9p server side lock code path.
2821 * So when a TLOCK request comes, always return success
2822 */
2823static void v9fs_lock(void *opaque)
2824{
2825    int8_t status;
2826    V9fsFlock flock;
2827    size_t offset = 7;
2828    struct stat stbuf;
2829    V9fsFidState *fidp;
2830    int32_t fid, err = 0;
2831    V9fsPDU *pdu = opaque;
2832    V9fsState *s = pdu->s;
2833
2834    status = P9_LOCK_ERROR;
2835    v9fs_string_init(&flock.client_id);
2836    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
2837                        &flock.flags, &flock.start, &flock.length,
2838                        &flock.proc_id, &flock.client_id);
2839    if (err < 0) {
2840        goto out_nofid;
2841    }
2842    trace_v9fs_lock(pdu->tag, pdu->id, fid,
2843                    flock.type, flock.start, flock.length);
2844
2845
2846    /* We support only block flag now (that too ignored currently) */
2847    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
2848        err = -EINVAL;
2849        goto out_nofid;
2850    }
2851    fidp = get_fid(pdu, fid);
2852    if (fidp == NULL) {
2853        err = -ENOENT;
2854        goto out_nofid;
2855    }
2856    err = v9fs_co_fstat(pdu, fidp, &stbuf);
2857    if (err < 0) {
2858        goto out;
2859    }
2860    status = P9_LOCK_SUCCESS;
2861out:
2862    put_fid(pdu, fidp);
2863out_nofid:
2864    err = pdu_marshal(pdu, offset, "b", status);
2865    if (err > 0) {
2866        err += offset;
2867    }
2868    trace_v9fs_lock_return(pdu->tag, pdu->id, status);
2869    complete_pdu(s, pdu, err);
2870    v9fs_string_free(&flock.client_id);
2871}
2872
2873/*
2874 * When a TGETLOCK request comes, always return success because all lock
2875 * handling is done by client's VFS layer.
2876 */
2877static void v9fs_getlock(void *opaque)
2878{
2879    size_t offset = 7;
2880    struct stat stbuf;
2881    V9fsFidState *fidp;
2882    V9fsGetlock glock;
2883    int32_t fid, err = 0;
2884    V9fsPDU *pdu = opaque;
2885    V9fsState *s = pdu->s;
2886
2887    v9fs_string_init(&glock.client_id);
2888    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
2889                        &glock.start, &glock.length, &glock.proc_id,
2890                        &glock.client_id);
2891    if (err < 0) {
2892        goto out_nofid;
2893    }
2894    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
2895                       glock.type, glock.start, glock.length);
2896
2897    fidp = get_fid(pdu, fid);
2898    if (fidp == NULL) {
2899        err = -ENOENT;
2900        goto out_nofid;
2901    }
2902    err = v9fs_co_fstat(pdu, fidp, &stbuf);
2903    if (err < 0) {
2904        goto out;
2905    }
2906    glock.type = P9_LOCK_TYPE_UNLCK;
2907    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
2908                          glock.start, glock.length, glock.proc_id,
2909                          &glock.client_id);
2910    if (err < 0) {
2911        goto out;
2912    }
2913    err += offset;
2914    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
2915                              glock.length, glock.proc_id);
2916out:
2917    put_fid(pdu, fidp);
2918out_nofid:
2919    complete_pdu(s, pdu, err);
2920    v9fs_string_free(&glock.client_id);
2921}
2922
2923static void v9fs_mkdir(void *opaque)
2924{
2925    V9fsPDU *pdu = opaque;
2926    size_t offset = 7;
2927    int32_t fid;
2928    struct stat stbuf;
2929    V9fsQID qid;
2930    V9fsString name;
2931    V9fsFidState *fidp;
2932    gid_t gid;
2933    int mode;
2934    int err = 0;
2935
2936    v9fs_string_init(&name);
2937    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
2938    if (err < 0) {
2939        goto out_nofid;
2940    }
2941    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
2942
2943    fidp = get_fid(pdu, fid);
2944    if (fidp == NULL) {
2945        err = -ENOENT;
2946        goto out_nofid;
2947    }
2948    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
2949    if (err < 0) {
2950        goto out;
2951    }
2952    stat_to_qid(&stbuf, &qid);
2953    err = pdu_marshal(pdu, offset, "Q", &qid);
2954    if (err < 0) {
2955        goto out;
2956    }
2957    err += offset;
2958    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
2959                            qid.type, qid.version, qid.path, err);
2960out:
2961    put_fid(pdu, fidp);
2962out_nofid:
2963    complete_pdu(pdu->s, pdu, err);
2964    v9fs_string_free(&name);
2965}
2966
2967static void v9fs_xattrwalk(void *opaque)
2968{
2969    int64_t size;
2970    V9fsString name;
2971    ssize_t err = 0;
2972    size_t offset = 7;
2973    int32_t fid, newfid;
2974    V9fsFidState *file_fidp;
2975    V9fsFidState *xattr_fidp = NULL;
2976    V9fsPDU *pdu = opaque;
2977    V9fsState *s = pdu->s;
2978
2979    v9fs_string_init(&name);
2980    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
2981    if (err < 0) {
2982        goto out_nofid;
2983    }
2984    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
2985
2986    file_fidp = get_fid(pdu, fid);
2987    if (file_fidp == NULL) {
2988        err = -ENOENT;
2989        goto out_nofid;
2990    }
2991    xattr_fidp = alloc_fid(s, newfid);
2992    if (xattr_fidp == NULL) {
2993        err = -EINVAL;
2994        goto out;
2995    }
2996    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
2997    if (name.data == NULL) {
2998        /*
2999         * listxattr request. Get the size first
3000         */
3001        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3002        if (size < 0) {
3003            err = size;
3004            clunk_fid(s, xattr_fidp->fid);
3005            goto out;
3006        }
3007        /*
3008         * Read the xattr value
3009         */
3010        xattr_fidp->fs.xattr.len = size;
3011        xattr_fidp->fid_type = P9_FID_XATTR;
3012        xattr_fidp->fs.xattr.copied_len = -1;
3013        if (size) {
3014            xattr_fidp->fs.xattr.value = g_malloc(size);
3015            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3016                                     xattr_fidp->fs.xattr.value,
3017                                     xattr_fidp->fs.xattr.len);
3018            if (err < 0) {
3019                clunk_fid(s, xattr_fidp->fid);
3020                goto out;
3021            }
3022        }
3023        err = pdu_marshal(pdu, offset, "q", size);
3024        if (err < 0) {
3025            goto out;
3026        }
3027        err += offset;
3028    } else {
3029        /*
3030         * specific xattr fid. We check for xattr
3031         * presence also collect the xattr size
3032         */
3033        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3034                                 &name, NULL, 0);
3035        if (size < 0) {
3036            err = size;
3037            clunk_fid(s, xattr_fidp->fid);
3038            goto out;
3039        }
3040        /*
3041         * Read the xattr value
3042         */
3043        xattr_fidp->fs.xattr.len = size;
3044        xattr_fidp->fid_type = P9_FID_XATTR;
3045        xattr_fidp->fs.xattr.copied_len = -1;
3046        if (size) {
3047            xattr_fidp->fs.xattr.value = g_malloc(size);
3048            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3049                                    &name, xattr_fidp->fs.xattr.value,
3050                                    xattr_fidp->fs.xattr.len);
3051            if (err < 0) {
3052                clunk_fid(s, xattr_fidp->fid);
3053                goto out;
3054            }
3055        }
3056        err = pdu_marshal(pdu, offset, "q", size);
3057        if (err < 0) {
3058            goto out;
3059        }
3060        err += offset;
3061    }
3062    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3063out:
3064    put_fid(pdu, file_fidp);
3065    if (xattr_fidp) {
3066        put_fid(pdu, xattr_fidp);
3067    }
3068out_nofid:
3069    complete_pdu(s, pdu, err);
3070    v9fs_string_free(&name);
3071}
3072
3073static void v9fs_xattrcreate(void *opaque)
3074{
3075    int flags;
3076    int32_t fid;
3077    int64_t size;
3078    ssize_t err = 0;
3079    V9fsString name;
3080    size_t offset = 7;
3081    V9fsFidState *file_fidp;
3082    V9fsFidState *xattr_fidp;
3083    V9fsPDU *pdu = opaque;
3084    V9fsState *s = pdu->s;
3085
3086    v9fs_string_init(&name);
3087    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3088    if (err < 0) {
3089        goto out_nofid;
3090    }
3091    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3092
3093    file_fidp = get_fid(pdu, fid);
3094    if (file_fidp == NULL) {
3095        err = -EINVAL;
3096        goto out_nofid;
3097    }
3098    /* Make the file fid point to xattr */
3099    xattr_fidp = file_fidp;
3100    xattr_fidp->fid_type = P9_FID_XATTR;
3101    xattr_fidp->fs.xattr.copied_len = 0;
3102    xattr_fidp->fs.xattr.len = size;
3103    xattr_fidp->fs.xattr.flags = flags;
3104    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3105    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3106    xattr_fidp->fs.xattr.value = g_malloc(size);
3107    err = offset;
3108    put_fid(pdu, file_fidp);
3109out_nofid:
3110    complete_pdu(s, pdu, err);
3111    v9fs_string_free(&name);
3112}
3113
3114static void v9fs_readlink(void *opaque)
3115{
3116    V9fsPDU *pdu = opaque;
3117    size_t offset = 7;
3118    V9fsString target;
3119    int32_t fid;
3120    int err = 0;
3121    V9fsFidState *fidp;
3122
3123    err = pdu_unmarshal(pdu, offset, "d", &fid);
3124    if (err < 0) {
3125        goto out_nofid;
3126    }
3127    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3128    fidp = get_fid(pdu, fid);
3129    if (fidp == NULL) {
3130        err = -ENOENT;
3131        goto out_nofid;
3132    }
3133
3134    v9fs_string_init(&target);
3135    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3136    if (err < 0) {
3137        goto out;
3138    }
3139    err = pdu_marshal(pdu, offset, "s", &target);
3140    if (err < 0) {
3141        v9fs_string_free(&target);
3142        goto out;
3143    }
3144    err += offset;
3145    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3146    v9fs_string_free(&target);
3147out:
3148    put_fid(pdu, fidp);
3149out_nofid:
3150    complete_pdu(pdu->s, pdu, err);
3151}
3152
3153static CoroutineEntry *pdu_co_handlers[] = {
3154    [P9_TREADDIR] = v9fs_readdir,
3155    [P9_TSTATFS] = v9fs_statfs,
3156    [P9_TGETATTR] = v9fs_getattr,
3157    [P9_TSETATTR] = v9fs_setattr,
3158    [P9_TXATTRWALK] = v9fs_xattrwalk,
3159    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3160    [P9_TMKNOD] = v9fs_mknod,
3161    [P9_TRENAME] = v9fs_rename,
3162    [P9_TLOCK] = v9fs_lock,
3163    [P9_TGETLOCK] = v9fs_getlock,
3164    [P9_TRENAMEAT] = v9fs_renameat,
3165    [P9_TREADLINK] = v9fs_readlink,
3166    [P9_TUNLINKAT] = v9fs_unlinkat,
3167    [P9_TMKDIR] = v9fs_mkdir,
3168    [P9_TVERSION] = v9fs_version,
3169    [P9_TLOPEN] = v9fs_open,
3170    [P9_TATTACH] = v9fs_attach,
3171    [P9_TSTAT] = v9fs_stat,
3172    [P9_TWALK] = v9fs_walk,
3173    [P9_TCLUNK] = v9fs_clunk,
3174    [P9_TFSYNC] = v9fs_fsync,
3175    [P9_TOPEN] = v9fs_open,
3176    [P9_TREAD] = v9fs_read,
3177#if 0
3178    [P9_TAUTH] = v9fs_auth,
3179#endif
3180    [P9_TFLUSH] = v9fs_flush,
3181    [P9_TLINK] = v9fs_link,
3182    [P9_TSYMLINK] = v9fs_symlink,
3183    [P9_TCREATE] = v9fs_create,
3184    [P9_TLCREATE] = v9fs_lcreate,
3185    [P9_TWRITE] = v9fs_write,
3186    [P9_TWSTAT] = v9fs_wstat,
3187    [P9_TREMOVE] = v9fs_remove,
3188};
3189
3190static void v9fs_op_not_supp(void *opaque)
3191{
3192    V9fsPDU *pdu = opaque;
3193    complete_pdu(pdu->s, pdu, -EOPNOTSUPP);
3194}
3195
3196static void v9fs_fs_ro(void *opaque)
3197{
3198    V9fsPDU *pdu = opaque;
3199    complete_pdu(pdu->s, pdu, -EROFS);
3200}
3201
3202static inline bool is_read_only_op(V9fsPDU *pdu)
3203{
3204    switch (pdu->id) {
3205    case P9_TREADDIR:
3206    case P9_TSTATFS:
3207    case P9_TGETATTR:
3208    case P9_TXATTRWALK:
3209    case P9_TLOCK:
3210    case P9_TGETLOCK:
3211    case P9_TREADLINK:
3212    case P9_TVERSION:
3213    case P9_TLOPEN:
3214    case P9_TATTACH:
3215    case P9_TSTAT:
3216    case P9_TWALK:
3217    case P9_TCLUNK:
3218    case P9_TFSYNC:
3219    case P9_TOPEN:
3220    case P9_TREAD:
3221    case P9_TAUTH:
3222    case P9_TFLUSH:
3223        return 1;
3224    default:
3225        return 0;
3226    }
3227}
3228
3229static void submit_pdu(V9fsState *s, V9fsPDU *pdu)
3230{
3231    Coroutine *co;
3232    CoroutineEntry *handler;
3233
3234    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
3235        (pdu_co_handlers[pdu->id] == NULL)) {
3236        handler = v9fs_op_not_supp;
3237    } else {
3238        handler = pdu_co_handlers[pdu->id];
3239    }
3240
3241    if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
3242        handler = v9fs_fs_ro;
3243    }
3244    co = qemu_coroutine_create(handler);
3245    qemu_coroutine_enter(co, pdu);
3246}
3247
3248void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
3249{
3250    V9fsState *s = (V9fsState *)vdev;
3251    V9fsPDU *pdu;
3252    ssize_t len;
3253
3254    while ((pdu = alloc_pdu(s)) &&
3255            (len = virtqueue_pop(vq, &pdu->elem)) != 0) {
3256        uint8_t *ptr;
3257        pdu->s = s;
3258        BUG_ON(pdu->elem.out_num == 0 || pdu->elem.in_num == 0);
3259        BUG_ON(pdu->elem.out_sg[0].iov_len < 7);
3260
3261        ptr = pdu->elem.out_sg[0].iov_base;
3262
3263        pdu->size = le32_to_cpu(*(uint32_t *)ptr);
3264        pdu->id = ptr[4];
3265        pdu->tag = le16_to_cpu(*(uint16_t *)(ptr + 5));
3266        qemu_co_queue_init(&pdu->complete);
3267        submit_pdu(s, pdu);
3268    }
3269    free_pdu(s, pdu);
3270}
3271
3272void virtio_9p_set_fd_limit(void)
3273{
3274    struct rlimit rlim;
3275    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
3276        fprintf(stderr, "Failed to get the resource limit\n");
3277        exit(1);
3278    }
3279    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
3280    open_fd_rc = rlim.rlim_cur/2;
3281}
3282