qemu/hw/9pfs/virtio-9p.c
<<
>>
Prefs
   1/*
   2 * Virtio 9p backend
   3 *
   4 * Copyright IBM, Corp. 2010
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "hw/virtio/virtio.h"
  15#include "hw/i386/pc.h"
  16#include "qemu/sockets.h"
  17#include "virtio-9p.h"
  18#include "fsdev/qemu-fsdev.h"
  19#include "virtio-9p-xattr.h"
  20#include "virtio-9p-coth.h"
  21#include "trace.h"
  22#include "migration/migration.h"
  23
  24int open_fd_hw;
  25int total_open_fd;
  26static int open_fd_rc;
  27
  28enum {
  29    Oread   = 0x00,
  30    Owrite  = 0x01,
  31    Ordwr   = 0x02,
  32    Oexec   = 0x03,
  33    Oexcl   = 0x04,
  34    Otrunc  = 0x10,
  35    Orexec  = 0x20,
  36    Orclose = 0x40,
  37    Oappend = 0x80,
  38};
  39
  40static int omode_to_uflags(int8_t mode)
  41{
  42    int ret = 0;
  43
  44    switch (mode & 3) {
  45    case Oread:
  46        ret = O_RDONLY;
  47        break;
  48    case Ordwr:
  49        ret = O_RDWR;
  50        break;
  51    case Owrite:
  52        ret = O_WRONLY;
  53        break;
  54    case Oexec:
  55        ret = O_RDONLY;
  56        break;
  57    }
  58
  59    if (mode & Otrunc) {
  60        ret |= O_TRUNC;
  61    }
  62
  63    if (mode & Oappend) {
  64        ret |= O_APPEND;
  65    }
  66
  67    if (mode & Oexcl) {
  68        ret |= O_EXCL;
  69    }
  70
  71    return ret;
  72}
  73
  74struct dotl_openflag_map {
  75    int dotl_flag;
  76    int open_flag;
  77};
  78
  79static int dotl_to_open_flags(int flags)
  80{
  81    int i;
  82    /*
  83     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
  84     * and P9_DOTL_NOACCESS
  85     */
  86    int oflags = flags & O_ACCMODE;
  87
  88    struct dotl_openflag_map dotl_oflag_map[] = {
  89        { P9_DOTL_CREATE, O_CREAT },
  90        { P9_DOTL_EXCL, O_EXCL },
  91        { P9_DOTL_NOCTTY , O_NOCTTY },
  92        { P9_DOTL_TRUNC, O_TRUNC },
  93        { P9_DOTL_APPEND, O_APPEND },
  94        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
  95        { P9_DOTL_DSYNC, O_DSYNC },
  96        { P9_DOTL_FASYNC, FASYNC },
  97        { P9_DOTL_DIRECT, O_DIRECT },
  98        { P9_DOTL_LARGEFILE, O_LARGEFILE },
  99        { P9_DOTL_DIRECTORY, O_DIRECTORY },
 100        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
 101        { P9_DOTL_NOATIME, O_NOATIME },
 102        { P9_DOTL_SYNC, O_SYNC },
 103    };
 104
 105    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
 106        if (flags & dotl_oflag_map[i].dotl_flag) {
 107            oflags |= dotl_oflag_map[i].open_flag;
 108        }
 109    }
 110
 111    return oflags;
 112}
 113
 114void cred_init(FsCred *credp)
 115{
 116    credp->fc_uid = -1;
 117    credp->fc_gid = -1;
 118    credp->fc_mode = -1;
 119    credp->fc_rdev = -1;
 120}
 121
 122static int get_dotl_openflags(V9fsState *s, int oflags)
 123{
 124    int flags;
 125    /*
 126     * Filter the client open flags
 127     */
 128    flags = dotl_to_open_flags(oflags);
 129    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
 130    /*
 131     * Ignore direct disk access hint until the server supports it.
 132     */
 133    flags &= ~O_DIRECT;
 134    return flags;
 135}
 136
 137void v9fs_path_init(V9fsPath *path)
 138{
 139    path->data = NULL;
 140    path->size = 0;
 141}
 142
 143void v9fs_path_free(V9fsPath *path)
 144{
 145    g_free(path->data);
 146    path->data = NULL;
 147    path->size = 0;
 148}
 149
 150void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs)
 151{
 152    v9fs_path_free(lhs);
 153    lhs->data = g_malloc(rhs->size);
 154    memcpy(lhs->data, rhs->data, rhs->size);
 155    lhs->size = rhs->size;
 156}
 157
 158int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
 159                      const char *name, V9fsPath *path)
 160{
 161    int err;
 162    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
 163    if (err < 0) {
 164        err = -errno;
 165    }
 166    return err;
 167}
 168
 169/*
 170 * Return TRUE if s1 is an ancestor of s2.
 171 *
 172 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
 173 * As a special case, We treat s1 as ancestor of s2 if they are same!
 174 */
 175static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
 176{
 177    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
 178        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
 179            return 1;
 180        }
 181    }
 182    return 0;
 183}
 184
 185static size_t v9fs_string_size(V9fsString *str)
 186{
 187    return str->size;
 188}
 189
 190/*
 191 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
 192static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 193{
 194    int err = 1;
 195    if (f->fid_type == P9_FID_FILE) {
 196        if (f->fs.fd == -1) {
 197            do {
 198                err = v9fs_co_open(pdu, f, f->open_flags);
 199            } while (err == -EINTR && !pdu->cancelled);
 200        }
 201    } else if (f->fid_type == P9_FID_DIR) {
 202        if (f->fs.dir == NULL) {
 203            do {
 204                err = v9fs_co_opendir(pdu, f);
 205            } while (err == -EINTR && !pdu->cancelled);
 206        }
 207    }
 208    return err;
 209}
 210
 211static V9fsFidState *get_fid(V9fsPDU *pdu, int32_t fid)
 212{
 213    int err;
 214    V9fsFidState *f;
 215    V9fsState *s = pdu->s;
 216
 217    for (f = s->fid_list; f; f = f->next) {
 218        BUG_ON(f->clunked);
 219        if (f->fid == fid) {
 220            /*
 221             * Update the fid ref upfront so that
 222             * we don't get reclaimed when we yield
 223             * in open later.
 224             */
 225            f->ref++;
 226            /*
 227             * check whether we need to reopen the
 228             * file. We might have closed the fd
 229             * while trying to free up some file
 230             * descriptors.
 231             */
 232            err = v9fs_reopen_fid(pdu, f);
 233            if (err < 0) {
 234                f->ref--;
 235                return NULL;
 236            }
 237            /*
 238             * Mark the fid as referenced so that the LRU
 239             * reclaim won't close the file descriptor
 240             */
 241            f->flags |= FID_REFERENCED;
 242            return f;
 243        }
 244    }
 245    return NULL;
 246}
 247
 248static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
 249{
 250    V9fsFidState *f;
 251
 252    for (f = s->fid_list; f; f = f->next) {
 253        /* If fid is already there return NULL */
 254        BUG_ON(f->clunked);
 255        if (f->fid == fid) {
 256            return NULL;
 257        }
 258    }
 259    f = g_malloc0(sizeof(V9fsFidState));
 260    f->fid = fid;
 261    f->fid_type = P9_FID_NONE;
 262    f->ref = 1;
 263    /*
 264     * Mark the fid as referenced so that the LRU
 265     * reclaim won't close the file descriptor
 266     */
 267    f->flags |= FID_REFERENCED;
 268    f->next = s->fid_list;
 269    s->fid_list = f;
 270
 271    return f;
 272}
 273
 274static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 275{
 276    int retval = 0;
 277
 278    if (fidp->fs.xattr.copied_len == -1) {
 279        /* getxattr/listxattr fid */
 280        goto free_value;
 281    }
 282    /*
 283     * if this is fid for setxattr. clunk should
 284     * result in setxattr localcall
 285     */
 286    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
 287        /* clunk after partial write */
 288        retval = -EINVAL;
 289        goto free_out;
 290    }
 291    if (fidp->fs.xattr.len) {
 292        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
 293                                   fidp->fs.xattr.value,
 294                                   fidp->fs.xattr.len,
 295                                   fidp->fs.xattr.flags);
 296    } else {
 297        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
 298    }
 299free_out:
 300    v9fs_string_free(&fidp->fs.xattr.name);
 301free_value:
 302    if (fidp->fs.xattr.value) {
 303        g_free(fidp->fs.xattr.value);
 304    }
 305    return retval;
 306}
 307
 308static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 309{
 310    int retval = 0;
 311
 312    if (fidp->fid_type == P9_FID_FILE) {
 313        /* If we reclaimed the fd no need to close */
 314        if (fidp->fs.fd != -1) {
 315            retval = v9fs_co_close(pdu, &fidp->fs);
 316        }
 317    } else if (fidp->fid_type == P9_FID_DIR) {
 318        if (fidp->fs.dir != NULL) {
 319            retval = v9fs_co_closedir(pdu, &fidp->fs);
 320        }
 321    } else if (fidp->fid_type == P9_FID_XATTR) {
 322        retval = v9fs_xattr_fid_clunk(pdu, fidp);
 323    }
 324    v9fs_path_free(&fidp->path);
 325    g_free(fidp);
 326    return retval;
 327}
 328
 329static int put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 330{
 331    BUG_ON(!fidp->ref);
 332    fidp->ref--;
 333    /*
 334     * Don't free the fid if it is in reclaim list
 335     */
 336    if (!fidp->ref && fidp->clunked) {
 337        if (fidp->fid == pdu->s->root_fid) {
 338            /*
 339             * if the clunked fid is root fid then we
 340             * have unmounted the fs on the client side.
 341             * delete the migration blocker. Ideally, this
 342             * should be hooked to transport close notification
 343             */
 344            if (pdu->s->migration_blocker) {
 345                migrate_del_blocker(pdu->s->migration_blocker);
 346                error_free(pdu->s->migration_blocker);
 347                pdu->s->migration_blocker = NULL;
 348            }
 349        }
 350        return free_fid(pdu, fidp);
 351    }
 352    return 0;
 353}
 354
 355static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
 356{
 357    V9fsFidState **fidpp, *fidp;
 358
 359    for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
 360        if ((*fidpp)->fid == fid) {
 361            break;
 362        }
 363    }
 364    if (*fidpp == NULL) {
 365        return NULL;
 366    }
 367    fidp = *fidpp;
 368    *fidpp = fidp->next;
 369    fidp->clunked = 1;
 370    return fidp;
 371}
 372
 373void v9fs_reclaim_fd(V9fsPDU *pdu)
 374{
 375    int reclaim_count = 0;
 376    V9fsState *s = pdu->s;
 377    V9fsFidState *f, *reclaim_list = NULL;
 378
 379    for (f = s->fid_list; f; f = f->next) {
 380        /*
 381         * Unlink fids cannot be reclaimed. Check
 382         * for them and skip them. Also skip fids
 383         * currently being operated on.
 384         */
 385        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
 386            continue;
 387        }
 388        /*
 389         * if it is a recently referenced fid
 390         * we leave the fid untouched and clear the
 391         * reference bit. We come back to it later
 392         * in the next iteration. (a simple LRU without
 393         * moving list elements around)
 394         */
 395        if (f->flags & FID_REFERENCED) {
 396            f->flags &= ~FID_REFERENCED;
 397            continue;
 398        }
 399        /*
 400         * Add fids to reclaim list.
 401         */
 402        if (f->fid_type == P9_FID_FILE) {
 403            if (f->fs.fd != -1) {
 404                /*
 405                 * Up the reference count so that
 406                 * a clunk request won't free this fid
 407                 */
 408                f->ref++;
 409                f->rclm_lst = reclaim_list;
 410                reclaim_list = f;
 411                f->fs_reclaim.fd = f->fs.fd;
 412                f->fs.fd = -1;
 413                reclaim_count++;
 414            }
 415        } else if (f->fid_type == P9_FID_DIR) {
 416            if (f->fs.dir != NULL) {
 417                /*
 418                 * Up the reference count so that
 419                 * a clunk request won't free this fid
 420                 */
 421                f->ref++;
 422                f->rclm_lst = reclaim_list;
 423                reclaim_list = f;
 424                f->fs_reclaim.dir = f->fs.dir;
 425                f->fs.dir = NULL;
 426                reclaim_count++;
 427            }
 428        }
 429        if (reclaim_count >= open_fd_rc) {
 430            break;
 431        }
 432    }
 433    /*
 434     * Now close the fid in reclaim list. Free them if they
 435     * are already clunked.
 436     */
 437    while (reclaim_list) {
 438        f = reclaim_list;
 439        reclaim_list = f->rclm_lst;
 440        if (f->fid_type == P9_FID_FILE) {
 441            v9fs_co_close(pdu, &f->fs_reclaim);
 442        } else if (f->fid_type == P9_FID_DIR) {
 443            v9fs_co_closedir(pdu, &f->fs_reclaim);
 444        }
 445        f->rclm_lst = NULL;
 446        /*
 447         * Now drop the fid reference, free it
 448         * if clunked.
 449         */
 450        put_fid(pdu, f);
 451    }
 452}
 453
 454static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 455{
 456    int err;
 457    V9fsState *s = pdu->s;
 458    V9fsFidState *fidp, head_fid;
 459
 460    head_fid.next = s->fid_list;
 461    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
 462        if (fidp->path.size != path->size) {
 463            continue;
 464        }
 465        if (!memcmp(fidp->path.data, path->data, path->size)) {
 466            /* Mark the fid non reclaimable. */
 467            fidp->flags |= FID_NON_RECLAIMABLE;
 468
 469            /* reopen the file/dir if already closed */
 470            err = v9fs_reopen_fid(pdu, fidp);
 471            if (err < 0) {
 472                return -1;
 473            }
 474            /*
 475             * Go back to head of fid list because
 476             * the list could have got updated when
 477             * switched to the worker thread
 478             */
 479            if (err == 0) {
 480                fidp = &head_fid;
 481            }
 482        }
 483    }
 484    return 0;
 485}
 486
 487static void virtfs_reset(V9fsPDU *pdu)
 488{
 489    V9fsState *s = pdu->s;
 490    V9fsFidState *fidp = NULL;
 491
 492    /* Free all fids */
 493    while (s->fid_list) {
 494        fidp = s->fid_list;
 495        s->fid_list = fidp->next;
 496
 497        if (fidp->ref) {
 498            fidp->clunked = 1;
 499        } else {
 500            free_fid(pdu, fidp);
 501        }
 502    }
 503    if (fidp) {
 504        /* One or more unclunked fids found... */
 505        error_report("9pfs:%s: One or more uncluncked fids "
 506                     "found during reset", __func__);
 507    }
 508}
 509
 510#define P9_QID_TYPE_DIR         0x80
 511#define P9_QID_TYPE_SYMLINK     0x02
 512
 513#define P9_STAT_MODE_DIR        0x80000000
 514#define P9_STAT_MODE_APPEND     0x40000000
 515#define P9_STAT_MODE_EXCL       0x20000000
 516#define P9_STAT_MODE_MOUNT      0x10000000
 517#define P9_STAT_MODE_AUTH       0x08000000
 518#define P9_STAT_MODE_TMP        0x04000000
 519#define P9_STAT_MODE_SYMLINK    0x02000000
 520#define P9_STAT_MODE_LINK       0x01000000
 521#define P9_STAT_MODE_DEVICE     0x00800000
 522#define P9_STAT_MODE_NAMED_PIPE 0x00200000
 523#define P9_STAT_MODE_SOCKET     0x00100000
 524#define P9_STAT_MODE_SETUID     0x00080000
 525#define P9_STAT_MODE_SETGID     0x00040000
 526#define P9_STAT_MODE_SETVTX     0x00010000
 527
 528#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
 529                                P9_STAT_MODE_SYMLINK |      \
 530                                P9_STAT_MODE_LINK |         \
 531                                P9_STAT_MODE_DEVICE |       \
 532                                P9_STAT_MODE_NAMED_PIPE |   \
 533                                P9_STAT_MODE_SOCKET)
 534
 535/* This is the algorithm from ufs in spfs */
 536static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
 537{
 538    size_t size;
 539
 540    memset(&qidp->path, 0, sizeof(qidp->path));
 541    size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
 542    memcpy(&qidp->path, &stbuf->st_ino, size);
 543    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
 544    qidp->type = 0;
 545    if (S_ISDIR(stbuf->st_mode)) {
 546        qidp->type |= P9_QID_TYPE_DIR;
 547    }
 548    if (S_ISLNK(stbuf->st_mode)) {
 549        qidp->type |= P9_QID_TYPE_SYMLINK;
 550    }
 551}
 552
 553static int fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, V9fsQID *qidp)
 554{
 555    struct stat stbuf;
 556    int err;
 557
 558    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
 559    if (err < 0) {
 560        return err;
 561    }
 562    stat_to_qid(&stbuf, qidp);
 563    return 0;
 564}
 565
 566static V9fsPDU *alloc_pdu(V9fsState *s)
 567{
 568    V9fsPDU *pdu = NULL;
 569
 570    if (!QLIST_EMPTY(&s->free_list)) {
 571        pdu = QLIST_FIRST(&s->free_list);
 572        QLIST_REMOVE(pdu, next);
 573        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
 574    }
 575    return pdu;
 576}
 577
 578static void free_pdu(V9fsState *s, V9fsPDU *pdu)
 579{
 580    if (pdu) {
 581        /*
 582         * Cancelled pdu are added back to the freelist
 583         * by flush request .
 584         */
 585        if (!pdu->cancelled) {
 586            QLIST_REMOVE(pdu, next);
 587            QLIST_INSERT_HEAD(&s->free_list, pdu, next);
 588        }
 589    }
 590}
 591
 592/*
 593 * We don't do error checking for pdu_marshal/unmarshal here
 594 * because we always expect to have enough space to encode
 595 * error details
 596 */
 597static void complete_pdu(V9fsState *s, V9fsPDU *pdu, ssize_t len)
 598{
 599    int8_t id = pdu->id + 1; /* Response */
 600
 601    if (len < 0) {
 602        int err = -len;
 603        len = 7;
 604
 605        if (s->proto_version != V9FS_PROTO_2000L) {
 606            V9fsString str;
 607
 608            str.data = strerror(err);
 609            str.size = strlen(str.data);
 610
 611            len += pdu_marshal(pdu, len, "s", &str);
 612            id = P9_RERROR;
 613        }
 614
 615        len += pdu_marshal(pdu, len, "d", err);
 616
 617        if (s->proto_version == V9FS_PROTO_2000L) {
 618            id = P9_RLERROR;
 619        }
 620        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
 621    }
 622
 623    /* fill out the header */
 624    pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag);
 625
 626    /* keep these in sync */
 627    pdu->size = len;
 628    pdu->id = id;
 629
 630    /* push onto queue and notify */
 631    virtqueue_push(s->vq, &pdu->elem, len);
 632
 633    /* FIXME: we should batch these completions */
 634    virtio_notify(VIRTIO_DEVICE(s), s->vq);
 635
 636    /* Now wakeup anybody waiting in flush for this request */
 637    qemu_co_queue_next(&pdu->complete);
 638
 639    free_pdu(s, pdu);
 640}
 641
 642static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
 643{
 644    mode_t ret;
 645
 646    ret = mode & 0777;
 647    if (mode & P9_STAT_MODE_DIR) {
 648        ret |= S_IFDIR;
 649    }
 650
 651    if (mode & P9_STAT_MODE_SYMLINK) {
 652        ret |= S_IFLNK;
 653    }
 654    if (mode & P9_STAT_MODE_SOCKET) {
 655        ret |= S_IFSOCK;
 656    }
 657    if (mode & P9_STAT_MODE_NAMED_PIPE) {
 658        ret |= S_IFIFO;
 659    }
 660    if (mode & P9_STAT_MODE_DEVICE) {
 661        if (extension->size && extension->data[0] == 'c') {
 662            ret |= S_IFCHR;
 663        } else {
 664            ret |= S_IFBLK;
 665        }
 666    }
 667
 668    if (!(ret&~0777)) {
 669        ret |= S_IFREG;
 670    }
 671
 672    if (mode & P9_STAT_MODE_SETUID) {
 673        ret |= S_ISUID;
 674    }
 675    if (mode & P9_STAT_MODE_SETGID) {
 676        ret |= S_ISGID;
 677    }
 678    if (mode & P9_STAT_MODE_SETVTX) {
 679        ret |= S_ISVTX;
 680    }
 681
 682    return ret;
 683}
 684
 685static int donttouch_stat(V9fsStat *stat)
 686{
 687    if (stat->type == -1 &&
 688        stat->dev == -1 &&
 689        stat->qid.type == -1 &&
 690        stat->qid.version == -1 &&
 691        stat->qid.path == -1 &&
 692        stat->mode == -1 &&
 693        stat->atime == -1 &&
 694        stat->mtime == -1 &&
 695        stat->length == -1 &&
 696        !stat->name.size &&
 697        !stat->uid.size &&
 698        !stat->gid.size &&
 699        !stat->muid.size &&
 700        stat->n_uid == -1 &&
 701        stat->n_gid == -1 &&
 702        stat->n_muid == -1) {
 703        return 1;
 704    }
 705
 706    return 0;
 707}
 708
 709static void v9fs_stat_init(V9fsStat *stat)
 710{
 711    v9fs_string_init(&stat->name);
 712    v9fs_string_init(&stat->uid);
 713    v9fs_string_init(&stat->gid);
 714    v9fs_string_init(&stat->muid);
 715    v9fs_string_init(&stat->extension);
 716}
 717
 718static void v9fs_stat_free(V9fsStat *stat)
 719{
 720    v9fs_string_free(&stat->name);
 721    v9fs_string_free(&stat->uid);
 722    v9fs_string_free(&stat->gid);
 723    v9fs_string_free(&stat->muid);
 724    v9fs_string_free(&stat->extension);
 725}
 726
 727static uint32_t stat_to_v9mode(const struct stat *stbuf)
 728{
 729    uint32_t mode;
 730
 731    mode = stbuf->st_mode & 0777;
 732    if (S_ISDIR(stbuf->st_mode)) {
 733        mode |= P9_STAT_MODE_DIR;
 734    }
 735
 736    if (S_ISLNK(stbuf->st_mode)) {
 737        mode |= P9_STAT_MODE_SYMLINK;
 738    }
 739
 740    if (S_ISSOCK(stbuf->st_mode)) {
 741        mode |= P9_STAT_MODE_SOCKET;
 742    }
 743
 744    if (S_ISFIFO(stbuf->st_mode)) {
 745        mode |= P9_STAT_MODE_NAMED_PIPE;
 746    }
 747
 748    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
 749        mode |= P9_STAT_MODE_DEVICE;
 750    }
 751
 752    if (stbuf->st_mode & S_ISUID) {
 753        mode |= P9_STAT_MODE_SETUID;
 754    }
 755
 756    if (stbuf->st_mode & S_ISGID) {
 757        mode |= P9_STAT_MODE_SETGID;
 758    }
 759
 760    if (stbuf->st_mode & S_ISVTX) {
 761        mode |= P9_STAT_MODE_SETVTX;
 762    }
 763
 764    return mode;
 765}
 766
 767static int stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
 768                            const struct stat *stbuf,
 769                            V9fsStat *v9stat)
 770{
 771    int err;
 772    const char *str;
 773
 774    memset(v9stat, 0, sizeof(*v9stat));
 775
 776    stat_to_qid(stbuf, &v9stat->qid);
 777    v9stat->mode = stat_to_v9mode(stbuf);
 778    v9stat->atime = stbuf->st_atime;
 779    v9stat->mtime = stbuf->st_mtime;
 780    v9stat->length = stbuf->st_size;
 781
 782    v9fs_string_null(&v9stat->uid);
 783    v9fs_string_null(&v9stat->gid);
 784    v9fs_string_null(&v9stat->muid);
 785
 786    v9stat->n_uid = stbuf->st_uid;
 787    v9stat->n_gid = stbuf->st_gid;
 788    v9stat->n_muid = 0;
 789
 790    v9fs_string_null(&v9stat->extension);
 791
 792    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
 793        err = v9fs_co_readlink(pdu, name, &v9stat->extension);
 794        if (err < 0) {
 795            return err;
 796        }
 797    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
 798        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
 799                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
 800                major(stbuf->st_rdev), minor(stbuf->st_rdev));
 801    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
 802        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
 803                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
 804    }
 805
 806    str = strrchr(name->data, '/');
 807    if (str) {
 808        str += 1;
 809    } else {
 810        str = name->data;
 811    }
 812
 813    v9fs_string_sprintf(&v9stat->name, "%s", str);
 814
 815    v9stat->size = 61 +
 816        v9fs_string_size(&v9stat->name) +
 817        v9fs_string_size(&v9stat->uid) +
 818        v9fs_string_size(&v9stat->gid) +
 819        v9fs_string_size(&v9stat->muid) +
 820        v9fs_string_size(&v9stat->extension);
 821    return 0;
 822}
 823
 824#define P9_STATS_MODE          0x00000001ULL
 825#define P9_STATS_NLINK         0x00000002ULL
 826#define P9_STATS_UID           0x00000004ULL
 827#define P9_STATS_GID           0x00000008ULL
 828#define P9_STATS_RDEV          0x00000010ULL
 829#define P9_STATS_ATIME         0x00000020ULL
 830#define P9_STATS_MTIME         0x00000040ULL
 831#define P9_STATS_CTIME         0x00000080ULL
 832#define P9_STATS_INO           0x00000100ULL
 833#define P9_STATS_SIZE          0x00000200ULL
 834#define P9_STATS_BLOCKS        0x00000400ULL
 835
 836#define P9_STATS_BTIME         0x00000800ULL
 837#define P9_STATS_GEN           0x00001000ULL
 838#define P9_STATS_DATA_VERSION  0x00002000ULL
 839
 840#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
 841#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
 842
 843
 844static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
 845                                V9fsStatDotl *v9lstat)
 846{
 847    memset(v9lstat, 0, sizeof(*v9lstat));
 848
 849    v9lstat->st_mode = stbuf->st_mode;
 850    v9lstat->st_nlink = stbuf->st_nlink;
 851    v9lstat->st_uid = stbuf->st_uid;
 852    v9lstat->st_gid = stbuf->st_gid;
 853    v9lstat->st_rdev = stbuf->st_rdev;
 854    v9lstat->st_size = stbuf->st_size;
 855    v9lstat->st_blksize = stbuf->st_blksize;
 856    v9lstat->st_blocks = stbuf->st_blocks;
 857    v9lstat->st_atime_sec = stbuf->st_atime;
 858    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
 859    v9lstat->st_mtime_sec = stbuf->st_mtime;
 860    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
 861    v9lstat->st_ctime_sec = stbuf->st_ctime;
 862    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
 863    /* Currently we only support BASIC fields in stat */
 864    v9lstat->st_result_mask = P9_STATS_BASIC;
 865
 866    stat_to_qid(stbuf, &v9lstat->qid);
 867}
 868
 869static void print_sg(struct iovec *sg, int cnt)
 870{
 871    int i;
 872
 873    printf("sg[%d]: {", cnt);
 874    for (i = 0; i < cnt; i++) {
 875        if (i) {
 876            printf(", ");
 877        }
 878        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
 879    }
 880    printf("}\n");
 881}
 882
 883/* Will call this only for path name based fid */
 884static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
 885{
 886    V9fsPath str;
 887    v9fs_path_init(&str);
 888    v9fs_path_copy(&str, dst);
 889    v9fs_string_sprintf((V9fsString *)dst, "%s%s", src->data, str.data+len);
 890    v9fs_path_free(&str);
 891    /* +1 to include terminating NULL */
 892    dst->size++;
 893}
 894
 895static inline bool is_ro_export(FsContext *ctx)
 896{
 897    return ctx->export_flags & V9FS_RDONLY;
 898}
 899
 900static void v9fs_version(void *opaque)
 901{
 902    ssize_t err;
 903    V9fsPDU *pdu = opaque;
 904    V9fsState *s = pdu->s;
 905    V9fsString version;
 906    size_t offset = 7;
 907
 908    v9fs_string_init(&version);
 909    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
 910    if (err < 0) {
 911        offset = err;
 912        goto out;
 913    }
 914    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
 915
 916    virtfs_reset(pdu);
 917
 918    if (!strcmp(version.data, "9P2000.u")) {
 919        s->proto_version = V9FS_PROTO_2000U;
 920    } else if (!strcmp(version.data, "9P2000.L")) {
 921        s->proto_version = V9FS_PROTO_2000L;
 922    } else {
 923        v9fs_string_sprintf(&version, "unknown");
 924    }
 925
 926    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
 927    if (err < 0) {
 928        offset = err;
 929        goto out;
 930    }
 931    offset += err;
 932    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
 933out:
 934    complete_pdu(s, pdu, offset);
 935    v9fs_string_free(&version);
 936}
 937
 938static void v9fs_attach(void *opaque)
 939{
 940    V9fsPDU *pdu = opaque;
 941    V9fsState *s = pdu->s;
 942    int32_t fid, afid, n_uname;
 943    V9fsString uname, aname;
 944    V9fsFidState *fidp;
 945    size_t offset = 7;
 946    V9fsQID qid;
 947    ssize_t err;
 948
 949    v9fs_string_init(&uname);
 950    v9fs_string_init(&aname);
 951    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
 952                        &afid, &uname, &aname, &n_uname);
 953    if (err < 0) {
 954        goto out_nofid;
 955    }
 956    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
 957
 958    fidp = alloc_fid(s, fid);
 959    if (fidp == NULL) {
 960        err = -EINVAL;
 961        goto out_nofid;
 962    }
 963    fidp->uid = n_uname;
 964    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
 965    if (err < 0) {
 966        err = -EINVAL;
 967        clunk_fid(s, fid);
 968        goto out;
 969    }
 970    err = fid_to_qid(pdu, fidp, &qid);
 971    if (err < 0) {
 972        err = -EINVAL;
 973        clunk_fid(s, fid);
 974        goto out;
 975    }
 976    err = pdu_marshal(pdu, offset, "Q", &qid);
 977    if (err < 0) {
 978        clunk_fid(s, fid);
 979        goto out;
 980    }
 981    err += offset;
 982    trace_v9fs_attach_return(pdu->tag, pdu->id,
 983                             qid.type, qid.version, qid.path);
 984    /*
 985     * disable migration if we haven't done already.
 986     * attach could get called multiple times for the same export.
 987     */
 988    if (!s->migration_blocker) {
 989        s->root_fid = fid;
 990        error_set(&s->migration_blocker, QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION,
 991                  s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
 992        migrate_add_blocker(s->migration_blocker);
 993    }
 994out:
 995    put_fid(pdu, fidp);
 996out_nofid:
 997    complete_pdu(s, pdu, err);
 998    v9fs_string_free(&uname);
 999    v9fs_string_free(&aname);
1000}
1001
1002static void v9fs_stat(void *opaque)
1003{
1004    int32_t fid;
1005    V9fsStat v9stat;
1006    ssize_t err = 0;
1007    size_t offset = 7;
1008    struct stat stbuf;
1009    V9fsFidState *fidp;
1010    V9fsPDU *pdu = opaque;
1011    V9fsState *s = pdu->s;
1012
1013    err = pdu_unmarshal(pdu, offset, "d", &fid);
1014    if (err < 0) {
1015        goto out_nofid;
1016    }
1017    trace_v9fs_stat(pdu->tag, pdu->id, fid);
1018
1019    fidp = get_fid(pdu, fid);
1020    if (fidp == NULL) {
1021        err = -ENOENT;
1022        goto out_nofid;
1023    }
1024    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1025    if (err < 0) {
1026        goto out;
1027    }
1028    err = stat_to_v9stat(pdu, &fidp->path, &stbuf, &v9stat);
1029    if (err < 0) {
1030        goto out;
1031    }
1032    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1033    if (err < 0) {
1034        v9fs_stat_free(&v9stat);
1035        goto out;
1036    }
1037    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1038                           v9stat.atime, v9stat.mtime, v9stat.length);
1039    err += offset;
1040    v9fs_stat_free(&v9stat);
1041out:
1042    put_fid(pdu, fidp);
1043out_nofid:
1044    complete_pdu(s, pdu, err);
1045}
1046
1047static void v9fs_getattr(void *opaque)
1048{
1049    int32_t fid;
1050    size_t offset = 7;
1051    ssize_t retval = 0;
1052    struct stat stbuf;
1053    V9fsFidState *fidp;
1054    uint64_t request_mask;
1055    V9fsStatDotl v9stat_dotl;
1056    V9fsPDU *pdu = opaque;
1057    V9fsState *s = pdu->s;
1058
1059    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1060    if (retval < 0) {
1061        goto out_nofid;
1062    }
1063    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1064
1065    fidp = get_fid(pdu, fid);
1066    if (fidp == NULL) {
1067        retval = -ENOENT;
1068        goto out_nofid;
1069    }
1070    /*
1071     * Currently we only support BASIC fields in stat, so there is no
1072     * need to look at request_mask.
1073     */
1074    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1075    if (retval < 0) {
1076        goto out;
1077    }
1078    stat_to_v9stat_dotl(s, &stbuf, &v9stat_dotl);
1079
1080    /*  fill st_gen if requested and supported by underlying fs */
1081    if (request_mask & P9_STATS_GEN) {
1082        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1083        if (retval < 0) {
1084            goto out;
1085        }
1086        v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1087    }
1088    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1089    if (retval < 0) {
1090        goto out;
1091    }
1092    retval += offset;
1093    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1094                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1095                              v9stat_dotl.st_gid);
1096out:
1097    put_fid(pdu, fidp);
1098out_nofid:
1099    complete_pdu(s, pdu, retval);
1100}
1101
1102/* Attribute flags */
1103#define P9_ATTR_MODE       (1 << 0)
1104#define P9_ATTR_UID        (1 << 1)
1105#define P9_ATTR_GID        (1 << 2)
1106#define P9_ATTR_SIZE       (1 << 3)
1107#define P9_ATTR_ATIME      (1 << 4)
1108#define P9_ATTR_MTIME      (1 << 5)
1109#define P9_ATTR_CTIME      (1 << 6)
1110#define P9_ATTR_ATIME_SET  (1 << 7)
1111#define P9_ATTR_MTIME_SET  (1 << 8)
1112
1113#define P9_ATTR_MASK    127
1114
1115static void v9fs_setattr(void *opaque)
1116{
1117    int err = 0;
1118    int32_t fid;
1119    V9fsFidState *fidp;
1120    size_t offset = 7;
1121    V9fsIattr v9iattr;
1122    V9fsPDU *pdu = opaque;
1123    V9fsState *s = pdu->s;
1124
1125    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1126    if (err < 0) {
1127        goto out_nofid;
1128    }
1129
1130    fidp = get_fid(pdu, fid);
1131    if (fidp == NULL) {
1132        err = -EINVAL;
1133        goto out_nofid;
1134    }
1135    if (v9iattr.valid & P9_ATTR_MODE) {
1136        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1137        if (err < 0) {
1138            goto out;
1139        }
1140    }
1141    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1142        struct timespec times[2];
1143        if (v9iattr.valid & P9_ATTR_ATIME) {
1144            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1145                times[0].tv_sec = v9iattr.atime_sec;
1146                times[0].tv_nsec = v9iattr.atime_nsec;
1147            } else {
1148                times[0].tv_nsec = UTIME_NOW;
1149            }
1150        } else {
1151            times[0].tv_nsec = UTIME_OMIT;
1152        }
1153        if (v9iattr.valid & P9_ATTR_MTIME) {
1154            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1155                times[1].tv_sec = v9iattr.mtime_sec;
1156                times[1].tv_nsec = v9iattr.mtime_nsec;
1157            } else {
1158                times[1].tv_nsec = UTIME_NOW;
1159            }
1160        } else {
1161            times[1].tv_nsec = UTIME_OMIT;
1162        }
1163        err = v9fs_co_utimensat(pdu, &fidp->path, times);
1164        if (err < 0) {
1165            goto out;
1166        }
1167    }
1168    /*
1169     * If the only valid entry in iattr is ctime we can call
1170     * chown(-1,-1) to update the ctime of the file
1171     */
1172    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1173        ((v9iattr.valid & P9_ATTR_CTIME)
1174         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1175        if (!(v9iattr.valid & P9_ATTR_UID)) {
1176            v9iattr.uid = -1;
1177        }
1178        if (!(v9iattr.valid & P9_ATTR_GID)) {
1179            v9iattr.gid = -1;
1180        }
1181        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1182                            v9iattr.gid);
1183        if (err < 0) {
1184            goto out;
1185        }
1186    }
1187    if (v9iattr.valid & (P9_ATTR_SIZE)) {
1188        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1189        if (err < 0) {
1190            goto out;
1191        }
1192    }
1193    err = offset;
1194out:
1195    put_fid(pdu, fidp);
1196out_nofid:
1197    complete_pdu(s, pdu, err);
1198}
1199
1200static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1201{
1202    int i;
1203    ssize_t err;
1204    size_t offset = 7;
1205
1206    err = pdu_marshal(pdu, offset, "w", nwnames);
1207    if (err < 0) {
1208        return err;
1209    }
1210    offset += err;
1211    for (i = 0; i < nwnames; i++) {
1212        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1213        if (err < 0) {
1214            return err;
1215        }
1216        offset += err;
1217    }
1218    return offset;
1219}
1220
1221static void v9fs_walk(void *opaque)
1222{
1223    int name_idx;
1224    V9fsQID *qids = NULL;
1225    int i, err = 0;
1226    V9fsPath dpath, path;
1227    uint16_t nwnames;
1228    struct stat stbuf;
1229    size_t offset = 7;
1230    int32_t fid, newfid;
1231    V9fsString *wnames = NULL;
1232    V9fsFidState *fidp;
1233    V9fsFidState *newfidp = NULL;
1234    V9fsPDU *pdu = opaque;
1235    V9fsState *s = pdu->s;
1236
1237    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1238    if (err < 0) {
1239        complete_pdu(s, pdu, err);
1240        return ;
1241    }
1242    offset += err;
1243
1244    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1245
1246    if (nwnames && nwnames <= P9_MAXWELEM) {
1247        wnames = g_malloc0(sizeof(wnames[0]) * nwnames);
1248        qids   = g_malloc0(sizeof(qids[0]) * nwnames);
1249        for (i = 0; i < nwnames; i++) {
1250            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1251            if (err < 0) {
1252                goto out_nofid;
1253            }
1254            offset += err;
1255        }
1256    } else if (nwnames > P9_MAXWELEM) {
1257        err = -EINVAL;
1258        goto out_nofid;
1259    }
1260    fidp = get_fid(pdu, fid);
1261    if (fidp == NULL) {
1262        err = -ENOENT;
1263        goto out_nofid;
1264    }
1265    v9fs_path_init(&dpath);
1266    v9fs_path_init(&path);
1267    /*
1268     * Both dpath and path initially poin to fidp.
1269     * Needed to handle request with nwnames == 0
1270     */
1271    v9fs_path_copy(&dpath, &fidp->path);
1272    v9fs_path_copy(&path, &fidp->path);
1273    for (name_idx = 0; name_idx < nwnames; name_idx++) {
1274        err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, &path);
1275        if (err < 0) {
1276            goto out;
1277        }
1278        err = v9fs_co_lstat(pdu, &path, &stbuf);
1279        if (err < 0) {
1280            goto out;
1281        }
1282        stat_to_qid(&stbuf, &qids[name_idx]);
1283        v9fs_path_copy(&dpath, &path);
1284    }
1285    if (fid == newfid) {
1286        BUG_ON(fidp->fid_type != P9_FID_NONE);
1287        v9fs_path_copy(&fidp->path, &path);
1288    } else {
1289        newfidp = alloc_fid(s, newfid);
1290        if (newfidp == NULL) {
1291            err = -EINVAL;
1292            goto out;
1293        }
1294        newfidp->uid = fidp->uid;
1295        v9fs_path_copy(&newfidp->path, &path);
1296    }
1297    err = v9fs_walk_marshal(pdu, nwnames, qids);
1298    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1299out:
1300    put_fid(pdu, fidp);
1301    if (newfidp) {
1302        put_fid(pdu, newfidp);
1303    }
1304    v9fs_path_free(&dpath);
1305    v9fs_path_free(&path);
1306out_nofid:
1307    complete_pdu(s, pdu, err);
1308    if (nwnames && nwnames <= P9_MAXWELEM) {
1309        for (name_idx = 0; name_idx < nwnames; name_idx++) {
1310            v9fs_string_free(&wnames[name_idx]);
1311        }
1312        g_free(wnames);
1313        g_free(qids);
1314    }
1315}
1316
1317static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
1318{
1319    struct statfs stbuf;
1320    int32_t iounit = 0;
1321    V9fsState *s = pdu->s;
1322
1323    /*
1324     * iounit should be multiples of f_bsize (host filesystem block size
1325     * and as well as less than (client msize - P9_IOHDRSZ))
1326     */
1327    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1328        iounit = stbuf.f_bsize;
1329        iounit *= (s->msize - P9_IOHDRSZ)/stbuf.f_bsize;
1330    }
1331    if (!iounit) {
1332        iounit = s->msize - P9_IOHDRSZ;
1333    }
1334    return iounit;
1335}
1336
1337static void v9fs_open(void *opaque)
1338{
1339    int flags;
1340    int32_t fid;
1341    int32_t mode;
1342    V9fsQID qid;
1343    int iounit = 0;
1344    ssize_t err = 0;
1345    size_t offset = 7;
1346    struct stat stbuf;
1347    V9fsFidState *fidp;
1348    V9fsPDU *pdu = opaque;
1349    V9fsState *s = pdu->s;
1350
1351    if (s->proto_version == V9FS_PROTO_2000L) {
1352        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1353    } else {
1354        uint8_t modebyte;
1355        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1356        mode = modebyte;
1357    }
1358    if (err < 0) {
1359        goto out_nofid;
1360    }
1361    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1362
1363    fidp = get_fid(pdu, fid);
1364    if (fidp == NULL) {
1365        err = -ENOENT;
1366        goto out_nofid;
1367    }
1368    BUG_ON(fidp->fid_type != P9_FID_NONE);
1369
1370    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1371    if (err < 0) {
1372        goto out;
1373    }
1374    stat_to_qid(&stbuf, &qid);
1375    if (S_ISDIR(stbuf.st_mode)) {
1376        err = v9fs_co_opendir(pdu, fidp);
1377        if (err < 0) {
1378            goto out;
1379        }
1380        fidp->fid_type = P9_FID_DIR;
1381        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1382        if (err < 0) {
1383            goto out;
1384        }
1385        err += offset;
1386    } else {
1387        if (s->proto_version == V9FS_PROTO_2000L) {
1388            flags = get_dotl_openflags(s, mode);
1389        } else {
1390            flags = omode_to_uflags(mode);
1391        }
1392        if (is_ro_export(&s->ctx)) {
1393            if (mode & O_WRONLY || mode & O_RDWR ||
1394                mode & O_APPEND || mode & O_TRUNC) {
1395                err = -EROFS;
1396                goto out;
1397            }
1398        }
1399        err = v9fs_co_open(pdu, fidp, flags);
1400        if (err < 0) {
1401            goto out;
1402        }
1403        fidp->fid_type = P9_FID_FILE;
1404        fidp->open_flags = flags;
1405        if (flags & O_EXCL) {
1406            /*
1407             * We let the host file system do O_EXCL check
1408             * We should not reclaim such fd
1409             */
1410            fidp->flags |= FID_NON_RECLAIMABLE;
1411        }
1412        iounit = get_iounit(pdu, &fidp->path);
1413        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1414        if (err < 0) {
1415            goto out;
1416        }
1417        err += offset;
1418    }
1419    trace_v9fs_open_return(pdu->tag, pdu->id,
1420                           qid.type, qid.version, qid.path, iounit);
1421out:
1422    put_fid(pdu, fidp);
1423out_nofid:
1424    complete_pdu(s, pdu, err);
1425}
1426
1427static void v9fs_lcreate(void *opaque)
1428{
1429    int32_t dfid, flags, mode;
1430    gid_t gid;
1431    ssize_t err = 0;
1432    ssize_t offset = 7;
1433    V9fsString name;
1434    V9fsFidState *fidp;
1435    struct stat stbuf;
1436    V9fsQID qid;
1437    int32_t iounit;
1438    V9fsPDU *pdu = opaque;
1439
1440    v9fs_string_init(&name);
1441    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1442                        &name, &flags, &mode, &gid);
1443    if (err < 0) {
1444        goto out_nofid;
1445    }
1446    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1447
1448    fidp = get_fid(pdu, dfid);
1449    if (fidp == NULL) {
1450        err = -ENOENT;
1451        goto out_nofid;
1452    }
1453
1454    flags = get_dotl_openflags(pdu->s, flags);
1455    err = v9fs_co_open2(pdu, fidp, &name, gid,
1456                        flags | O_CREAT, mode, &stbuf);
1457    if (err < 0) {
1458        goto out;
1459    }
1460    fidp->fid_type = P9_FID_FILE;
1461    fidp->open_flags = flags;
1462    if (flags & O_EXCL) {
1463        /*
1464         * We let the host file system do O_EXCL check
1465         * We should not reclaim such fd
1466         */
1467        fidp->flags |= FID_NON_RECLAIMABLE;
1468    }
1469    iounit =  get_iounit(pdu, &fidp->path);
1470    stat_to_qid(&stbuf, &qid);
1471    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1472    if (err < 0) {
1473        goto out;
1474    }
1475    err += offset;
1476    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
1477                              qid.type, qid.version, qid.path, iounit);
1478out:
1479    put_fid(pdu, fidp);
1480out_nofid:
1481    complete_pdu(pdu->s, pdu, err);
1482    v9fs_string_free(&name);
1483}
1484
1485static void v9fs_fsync(void *opaque)
1486{
1487    int err;
1488    int32_t fid;
1489    int datasync;
1490    size_t offset = 7;
1491    V9fsFidState *fidp;
1492    V9fsPDU *pdu = opaque;
1493    V9fsState *s = pdu->s;
1494
1495    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
1496    if (err < 0) {
1497        goto out_nofid;
1498    }
1499    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
1500
1501    fidp = get_fid(pdu, fid);
1502    if (fidp == NULL) {
1503        err = -ENOENT;
1504        goto out_nofid;
1505    }
1506    err = v9fs_co_fsync(pdu, fidp, datasync);
1507    if (!err) {
1508        err = offset;
1509    }
1510    put_fid(pdu, fidp);
1511out_nofid:
1512    complete_pdu(s, pdu, err);
1513}
1514
1515static void v9fs_clunk(void *opaque)
1516{
1517    int err;
1518    int32_t fid;
1519    size_t offset = 7;
1520    V9fsFidState *fidp;
1521    V9fsPDU *pdu = opaque;
1522    V9fsState *s = pdu->s;
1523
1524    err = pdu_unmarshal(pdu, offset, "d", &fid);
1525    if (err < 0) {
1526        goto out_nofid;
1527    }
1528    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
1529
1530    fidp = clunk_fid(s, fid);
1531    if (fidp == NULL) {
1532        err = -ENOENT;
1533        goto out_nofid;
1534    }
1535    /*
1536     * Bump the ref so that put_fid will
1537     * free the fid.
1538     */
1539    fidp->ref++;
1540    err = put_fid(pdu, fidp);
1541    if (!err) {
1542        err = offset;
1543    }
1544out_nofid:
1545    complete_pdu(s, pdu, err);
1546}
1547
1548static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1549                           uint64_t off, uint32_t max_count)
1550{
1551    ssize_t err;
1552    size_t offset = 7;
1553    int read_count;
1554    int64_t xattr_len;
1555
1556    xattr_len = fidp->fs.xattr.len;
1557    read_count = xattr_len - off;
1558    if (read_count > max_count) {
1559        read_count = max_count;
1560    } else if (read_count < 0) {
1561        /*
1562         * read beyond XATTR value
1563         */
1564        read_count = 0;
1565    }
1566    err = pdu_marshal(pdu, offset, "d", read_count);
1567    if (err < 0) {
1568        return err;
1569    }
1570    offset += err;
1571    err = v9fs_pack(pdu->elem.in_sg, pdu->elem.in_num, offset,
1572                    ((char *)fidp->fs.xattr.value) + off,
1573                    read_count);
1574    if (err < 0) {
1575        return err;
1576    }
1577    offset += err;
1578    return offset;
1579}
1580
1581static int v9fs_do_readdir_with_stat(V9fsPDU *pdu,
1582                                     V9fsFidState *fidp, uint32_t max_count)
1583{
1584    V9fsPath path;
1585    V9fsStat v9stat;
1586    int len, err = 0;
1587    int32_t count = 0;
1588    struct stat stbuf;
1589    off_t saved_dir_pos;
1590    struct dirent *dent, *result;
1591
1592    /* save the directory position */
1593    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1594    if (saved_dir_pos < 0) {
1595        return saved_dir_pos;
1596    }
1597
1598    dent = g_malloc(sizeof(struct dirent));
1599
1600    while (1) {
1601        v9fs_path_init(&path);
1602        err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1603        if (err || !result) {
1604            break;
1605        }
1606        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
1607        if (err < 0) {
1608            goto out;
1609        }
1610        err = v9fs_co_lstat(pdu, &path, &stbuf);
1611        if (err < 0) {
1612            goto out;
1613        }
1614        err = stat_to_v9stat(pdu, &path, &stbuf, &v9stat);
1615        if (err < 0) {
1616            goto out;
1617        }
1618        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1619        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
1620        if ((len != (v9stat.size + 2)) || ((count + len) > max_count)) {
1621            /* Ran out of buffer. Set dir back to old position and return */
1622            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1623            v9fs_stat_free(&v9stat);
1624            v9fs_path_free(&path);
1625            g_free(dent);
1626            return count;
1627        }
1628        count += len;
1629        v9fs_stat_free(&v9stat);
1630        v9fs_path_free(&path);
1631        saved_dir_pos = dent->d_off;
1632    }
1633out:
1634    g_free(dent);
1635    v9fs_path_free(&path);
1636    if (err < 0) {
1637        return err;
1638    }
1639    return count;
1640}
1641
1642/*
1643 * Create a QEMUIOVector for a sub-region of PDU iovecs
1644 *
1645 * @qiov:       uninitialized QEMUIOVector
1646 * @skip:       number of bytes to skip from beginning of PDU
1647 * @size:       number of bytes to include
1648 * @is_write:   true - write, false - read
1649 *
1650 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
1651 * with qemu_iovec_destroy().
1652 */
1653static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
1654                                    size_t skip, size_t size,
1655                                    bool is_write)
1656{
1657    QEMUIOVector elem;
1658    struct iovec *iov;
1659    unsigned int niov;
1660
1661    if (is_write) {
1662        iov = pdu->elem.out_sg;
1663        niov = pdu->elem.out_num;
1664    } else {
1665        iov = pdu->elem.in_sg;
1666        niov = pdu->elem.in_num;
1667    }
1668
1669    qemu_iovec_init_external(&elem, iov, niov);
1670    qemu_iovec_init(qiov, niov);
1671    qemu_iovec_concat(qiov, &elem, skip, size);
1672}
1673
1674static void v9fs_read(void *opaque)
1675{
1676    int32_t fid;
1677    uint64_t off;
1678    ssize_t err = 0;
1679    int32_t count = 0;
1680    size_t offset = 7;
1681    uint32_t max_count;
1682    V9fsFidState *fidp;
1683    V9fsPDU *pdu = opaque;
1684    V9fsState *s = pdu->s;
1685
1686    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
1687    if (err < 0) {
1688        goto out_nofid;
1689    }
1690    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
1691
1692    fidp = get_fid(pdu, fid);
1693    if (fidp == NULL) {
1694        err = -EINVAL;
1695        goto out_nofid;
1696    }
1697    if (fidp->fid_type == P9_FID_DIR) {
1698
1699        if (off == 0) {
1700            v9fs_co_rewinddir(pdu, fidp);
1701        }
1702        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
1703        if (count < 0) {
1704            err = count;
1705            goto out;
1706        }
1707        err = pdu_marshal(pdu, offset, "d", count);
1708        if (err < 0) {
1709            goto out;
1710        }
1711        err += offset + count;
1712    } else if (fidp->fid_type == P9_FID_FILE) {
1713        QEMUIOVector qiov_full;
1714        QEMUIOVector qiov;
1715        int32_t len;
1716
1717        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
1718        qemu_iovec_init(&qiov, qiov_full.niov);
1719        do {
1720            qemu_iovec_reset(&qiov);
1721            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
1722            if (0) {
1723                print_sg(qiov.iov, qiov.niov);
1724            }
1725            /* Loop in case of EINTR */
1726            do {
1727                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
1728                if (len >= 0) {
1729                    off   += len;
1730                    count += len;
1731                }
1732            } while (len == -EINTR && !pdu->cancelled);
1733            if (len < 0) {
1734                /* IO error return the error */
1735                err = len;
1736                goto out;
1737            }
1738        } while (count < max_count && len > 0);
1739        err = pdu_marshal(pdu, offset, "d", count);
1740        if (err < 0) {
1741            goto out;
1742        }
1743        err += offset + count;
1744        qemu_iovec_destroy(&qiov);
1745        qemu_iovec_destroy(&qiov_full);
1746    } else if (fidp->fid_type == P9_FID_XATTR) {
1747        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
1748    } else {
1749        err = -EINVAL;
1750    }
1751    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
1752out:
1753    put_fid(pdu, fidp);
1754out_nofid:
1755    complete_pdu(s, pdu, err);
1756}
1757
1758static size_t v9fs_readdir_data_size(V9fsString *name)
1759{
1760    /*
1761     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
1762     * size of type (1) + size of name.size (2) + strlen(name.data)
1763     */
1764    return 24 + v9fs_string_size(name);
1765}
1766
1767static int v9fs_do_readdir(V9fsPDU *pdu,
1768                           V9fsFidState *fidp, int32_t max_count)
1769{
1770    size_t size;
1771    V9fsQID qid;
1772    V9fsString name;
1773    int len, err = 0;
1774    int32_t count = 0;
1775    off_t saved_dir_pos;
1776    struct dirent *dent, *result;
1777
1778    /* save the directory position */
1779    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1780    if (saved_dir_pos < 0) {
1781        return saved_dir_pos;
1782    }
1783
1784    dent = g_malloc(sizeof(struct dirent));
1785
1786    while (1) {
1787        err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1788        if (err || !result) {
1789            break;
1790        }
1791        v9fs_string_init(&name);
1792        v9fs_string_sprintf(&name, "%s", dent->d_name);
1793        if ((count + v9fs_readdir_data_size(&name)) > max_count) {
1794            /* Ran out of buffer. Set dir back to old position and return */
1795            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1796            v9fs_string_free(&name);
1797            g_free(dent);
1798            return count;
1799        }
1800        /*
1801         * Fill up just the path field of qid because the client uses
1802         * only that. To fill the entire qid structure we will have
1803         * to stat each dirent found, which is expensive
1804         */
1805        size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
1806        memcpy(&qid.path, &dent->d_ino, size);
1807        /* Fill the other fields with dummy values */
1808        qid.type = 0;
1809        qid.version = 0;
1810
1811        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1812        len = pdu_marshal(pdu, 11 + count, "Qqbs",
1813                          &qid, dent->d_off,
1814                          dent->d_type, &name);
1815        if (len < 0) {
1816            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1817            v9fs_string_free(&name);
1818            g_free(dent);
1819            return len;
1820        }
1821        count += len;
1822        v9fs_string_free(&name);
1823        saved_dir_pos = dent->d_off;
1824    }
1825    g_free(dent);
1826    if (err < 0) {
1827        return err;
1828    }
1829    return count;
1830}
1831
1832static void v9fs_readdir(void *opaque)
1833{
1834    int32_t fid;
1835    V9fsFidState *fidp;
1836    ssize_t retval = 0;
1837    size_t offset = 7;
1838    uint64_t initial_offset;
1839    int32_t count;
1840    uint32_t max_count;
1841    V9fsPDU *pdu = opaque;
1842    V9fsState *s = pdu->s;
1843
1844    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
1845                           &initial_offset, &max_count);
1846    if (retval < 0) {
1847        goto out_nofid;
1848    }
1849    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
1850
1851    fidp = get_fid(pdu, fid);
1852    if (fidp == NULL) {
1853        retval = -EINVAL;
1854        goto out_nofid;
1855    }
1856    if (!fidp->fs.dir) {
1857        retval = -EINVAL;
1858        goto out;
1859    }
1860    if (initial_offset == 0) {
1861        v9fs_co_rewinddir(pdu, fidp);
1862    } else {
1863        v9fs_co_seekdir(pdu, fidp, initial_offset);
1864    }
1865    count = v9fs_do_readdir(pdu, fidp, max_count);
1866    if (count < 0) {
1867        retval = count;
1868        goto out;
1869    }
1870    retval = pdu_marshal(pdu, offset, "d", count);
1871    if (retval < 0) {
1872        goto out;
1873    }
1874    retval += count + offset;
1875    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
1876out:
1877    put_fid(pdu, fidp);
1878out_nofid:
1879    complete_pdu(s, pdu, retval);
1880}
1881
1882static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1883                            uint64_t off, uint32_t count,
1884                            struct iovec *sg, int cnt)
1885{
1886    int i, to_copy;
1887    ssize_t err = 0;
1888    int write_count;
1889    int64_t xattr_len;
1890    size_t offset = 7;
1891
1892
1893    xattr_len = fidp->fs.xattr.len;
1894    write_count = xattr_len - off;
1895    if (write_count > count) {
1896        write_count = count;
1897    } else if (write_count < 0) {
1898        /*
1899         * write beyond XATTR value len specified in
1900         * xattrcreate
1901         */
1902        err = -ENOSPC;
1903        goto out;
1904    }
1905    err = pdu_marshal(pdu, offset, "d", write_count);
1906    if (err < 0) {
1907        return err;
1908    }
1909    err += offset;
1910    fidp->fs.xattr.copied_len += write_count;
1911    /*
1912     * Now copy the content from sg list
1913     */
1914    for (i = 0; i < cnt; i++) {
1915        if (write_count > sg[i].iov_len) {
1916            to_copy = sg[i].iov_len;
1917        } else {
1918            to_copy = write_count;
1919        }
1920        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
1921        /* updating vs->off since we are not using below */
1922        off += to_copy;
1923        write_count -= to_copy;
1924    }
1925out:
1926    return err;
1927}
1928
1929static void v9fs_write(void *opaque)
1930{
1931    ssize_t err;
1932    int32_t fid;
1933    uint64_t off;
1934    uint32_t count;
1935    int32_t len = 0;
1936    int32_t total = 0;
1937    size_t offset = 7;
1938    V9fsFidState *fidp;
1939    V9fsPDU *pdu = opaque;
1940    V9fsState *s = pdu->s;
1941    QEMUIOVector qiov_full;
1942    QEMUIOVector qiov;
1943
1944    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
1945    if (err < 0) {
1946        return complete_pdu(s, pdu, err);
1947    }
1948    offset += err;
1949    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
1950    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
1951
1952    fidp = get_fid(pdu, fid);
1953    if (fidp == NULL) {
1954        err = -EINVAL;
1955        goto out_nofid;
1956    }
1957    if (fidp->fid_type == P9_FID_FILE) {
1958        if (fidp->fs.fd == -1) {
1959            err = -EINVAL;
1960            goto out;
1961        }
1962    } else if (fidp->fid_type == P9_FID_XATTR) {
1963        /*
1964         * setxattr operation
1965         */
1966        err = v9fs_xattr_write(s, pdu, fidp, off, count,
1967                               qiov_full.iov, qiov_full.niov);
1968        goto out;
1969    } else {
1970        err = -EINVAL;
1971        goto out;
1972    }
1973    qemu_iovec_init(&qiov, qiov_full.niov);
1974    do {
1975        qemu_iovec_reset(&qiov);
1976        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
1977        if (0) {
1978            print_sg(qiov.iov, qiov.niov);
1979        }
1980        /* Loop in case of EINTR */
1981        do {
1982            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
1983            if (len >= 0) {
1984                off   += len;
1985                total += len;
1986            }
1987        } while (len == -EINTR && !pdu->cancelled);
1988        if (len < 0) {
1989            /* IO error return the error */
1990            err = len;
1991            goto out_qiov;
1992        }
1993    } while (total < count && len > 0);
1994
1995    offset = 7;
1996    err = pdu_marshal(pdu, offset, "d", total);
1997    if (err < 0) {
1998        goto out;
1999    }
2000    err += offset;
2001    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2002out_qiov:
2003    qemu_iovec_destroy(&qiov);
2004out:
2005    put_fid(pdu, fidp);
2006out_nofid:
2007    qemu_iovec_destroy(&qiov_full);
2008    complete_pdu(s, pdu, err);
2009}
2010
2011static void v9fs_create(void *opaque)
2012{
2013    int32_t fid;
2014    int err = 0;
2015    size_t offset = 7;
2016    V9fsFidState *fidp;
2017    V9fsQID qid;
2018    int32_t perm;
2019    int8_t mode;
2020    V9fsPath path;
2021    struct stat stbuf;
2022    V9fsString name;
2023    V9fsString extension;
2024    int iounit;
2025    V9fsPDU *pdu = opaque;
2026
2027    v9fs_path_init(&path);
2028    v9fs_string_init(&name);
2029    v9fs_string_init(&extension);
2030    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2031                        &perm, &mode, &extension);
2032    if (err < 0) {
2033        goto out_nofid;
2034    }
2035    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2036
2037    fidp = get_fid(pdu, fid);
2038    if (fidp == NULL) {
2039        err = -EINVAL;
2040        goto out_nofid;
2041    }
2042    if (perm & P9_STAT_MODE_DIR) {
2043        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2044                            fidp->uid, -1, &stbuf);
2045        if (err < 0) {
2046            goto out;
2047        }
2048        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2049        if (err < 0) {
2050            goto out;
2051        }
2052        v9fs_path_copy(&fidp->path, &path);
2053        err = v9fs_co_opendir(pdu, fidp);
2054        if (err < 0) {
2055            goto out;
2056        }
2057        fidp->fid_type = P9_FID_DIR;
2058    } else if (perm & P9_STAT_MODE_SYMLINK) {
2059        err = v9fs_co_symlink(pdu, fidp, &name,
2060                              extension.data, -1 , &stbuf);
2061        if (err < 0) {
2062            goto out;
2063        }
2064        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2065        if (err < 0) {
2066            goto out;
2067        }
2068        v9fs_path_copy(&fidp->path, &path);
2069    } else if (perm & P9_STAT_MODE_LINK) {
2070        int32_t ofid = atoi(extension.data);
2071        V9fsFidState *ofidp = get_fid(pdu, ofid);
2072        if (ofidp == NULL) {
2073            err = -EINVAL;
2074            goto out;
2075        }
2076        err = v9fs_co_link(pdu, ofidp, fidp, &name);
2077        put_fid(pdu, ofidp);
2078        if (err < 0) {
2079            goto out;
2080        }
2081        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2082        if (err < 0) {
2083            fidp->fid_type = P9_FID_NONE;
2084            goto out;
2085        }
2086        v9fs_path_copy(&fidp->path, &path);
2087        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2088        if (err < 0) {
2089            fidp->fid_type = P9_FID_NONE;
2090            goto out;
2091        }
2092    } else if (perm & P9_STAT_MODE_DEVICE) {
2093        char ctype;
2094        uint32_t major, minor;
2095        mode_t nmode = 0;
2096
2097        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2098            err = -errno;
2099            goto out;
2100        }
2101
2102        switch (ctype) {
2103        case 'c':
2104            nmode = S_IFCHR;
2105            break;
2106        case 'b':
2107            nmode = S_IFBLK;
2108            break;
2109        default:
2110            err = -EIO;
2111            goto out;
2112        }
2113
2114        nmode |= perm & 0777;
2115        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2116                            makedev(major, minor), nmode, &stbuf);
2117        if (err < 0) {
2118            goto out;
2119        }
2120        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2121        if (err < 0) {
2122            goto out;
2123        }
2124        v9fs_path_copy(&fidp->path, &path);
2125    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2126        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2127                            0, S_IFIFO | (perm & 0777), &stbuf);
2128        if (err < 0) {
2129            goto out;
2130        }
2131        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2132        if (err < 0) {
2133            goto out;
2134        }
2135        v9fs_path_copy(&fidp->path, &path);
2136    } else if (perm & P9_STAT_MODE_SOCKET) {
2137        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2138                            0, S_IFSOCK | (perm & 0777), &stbuf);
2139        if (err < 0) {
2140            goto out;
2141        }
2142        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2143        if (err < 0) {
2144            goto out;
2145        }
2146        v9fs_path_copy(&fidp->path, &path);
2147    } else {
2148        err = v9fs_co_open2(pdu, fidp, &name, -1,
2149                            omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2150        if (err < 0) {
2151            goto out;
2152        }
2153        fidp->fid_type = P9_FID_FILE;
2154        fidp->open_flags = omode_to_uflags(mode);
2155        if (fidp->open_flags & O_EXCL) {
2156            /*
2157             * We let the host file system do O_EXCL check
2158             * We should not reclaim such fd
2159             */
2160            fidp->flags |= FID_NON_RECLAIMABLE;
2161        }
2162    }
2163    iounit = get_iounit(pdu, &fidp->path);
2164    stat_to_qid(&stbuf, &qid);
2165    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2166    if (err < 0) {
2167        goto out;
2168    }
2169    err += offset;
2170    trace_v9fs_create_return(pdu->tag, pdu->id,
2171                             qid.type, qid.version, qid.path, iounit);
2172out:
2173    put_fid(pdu, fidp);
2174out_nofid:
2175   complete_pdu(pdu->s, pdu, err);
2176   v9fs_string_free(&name);
2177   v9fs_string_free(&extension);
2178   v9fs_path_free(&path);
2179}
2180
2181static void v9fs_symlink(void *opaque)
2182{
2183    V9fsPDU *pdu = opaque;
2184    V9fsString name;
2185    V9fsString symname;
2186    V9fsFidState *dfidp;
2187    V9fsQID qid;
2188    struct stat stbuf;
2189    int32_t dfid;
2190    int err = 0;
2191    gid_t gid;
2192    size_t offset = 7;
2193
2194    v9fs_string_init(&name);
2195    v9fs_string_init(&symname);
2196    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2197    if (err < 0) {
2198        goto out_nofid;
2199    }
2200    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2201
2202    dfidp = get_fid(pdu, dfid);
2203    if (dfidp == NULL) {
2204        err = -EINVAL;
2205        goto out_nofid;
2206    }
2207    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2208    if (err < 0) {
2209        goto out;
2210    }
2211    stat_to_qid(&stbuf, &qid);
2212    err =  pdu_marshal(pdu, offset, "Q", &qid);
2213    if (err < 0) {
2214        goto out;
2215    }
2216    err += offset;
2217    trace_v9fs_symlink_return(pdu->tag, pdu->id,
2218                              qid.type, qid.version, qid.path);
2219out:
2220    put_fid(pdu, dfidp);
2221out_nofid:
2222    complete_pdu(pdu->s, pdu, err);
2223    v9fs_string_free(&name);
2224    v9fs_string_free(&symname);
2225}
2226
2227static void v9fs_flush(void *opaque)
2228{
2229    ssize_t err;
2230    int16_t tag;
2231    size_t offset = 7;
2232    V9fsPDU *cancel_pdu;
2233    V9fsPDU *pdu = opaque;
2234    V9fsState *s = pdu->s;
2235
2236    err = pdu_unmarshal(pdu, offset, "w", &tag);
2237    if (err < 0) {
2238        complete_pdu(s, pdu, err);
2239        return;
2240    }
2241    trace_v9fs_flush(pdu->tag, pdu->id, tag);
2242
2243    QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2244        if (cancel_pdu->tag == tag) {
2245            break;
2246        }
2247    }
2248    if (cancel_pdu) {
2249        cancel_pdu->cancelled = 1;
2250        /*
2251         * Wait for pdu to complete.
2252         */
2253        qemu_co_queue_wait(&cancel_pdu->complete);
2254        cancel_pdu->cancelled = 0;
2255        free_pdu(pdu->s, cancel_pdu);
2256    }
2257    complete_pdu(s, pdu, 7);
2258}
2259
2260static void v9fs_link(void *opaque)
2261{
2262    V9fsPDU *pdu = opaque;
2263    V9fsState *s = pdu->s;
2264    int32_t dfid, oldfid;
2265    V9fsFidState *dfidp, *oldfidp;
2266    V9fsString name;
2267    size_t offset = 7;
2268    int err = 0;
2269
2270    v9fs_string_init(&name);
2271    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2272    if (err < 0) {
2273        goto out_nofid;
2274    }
2275    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2276
2277    dfidp = get_fid(pdu, dfid);
2278    if (dfidp == NULL) {
2279        err = -ENOENT;
2280        goto out_nofid;
2281    }
2282
2283    oldfidp = get_fid(pdu, oldfid);
2284    if (oldfidp == NULL) {
2285        err = -ENOENT;
2286        goto out;
2287    }
2288    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2289    if (!err) {
2290        err = offset;
2291    }
2292out:
2293    put_fid(pdu, dfidp);
2294out_nofid:
2295    v9fs_string_free(&name);
2296    complete_pdu(s, pdu, err);
2297}
2298
2299/* Only works with path name based fid */
2300static void v9fs_remove(void *opaque)
2301{
2302    int32_t fid;
2303    int err = 0;
2304    size_t offset = 7;
2305    V9fsFidState *fidp;
2306    V9fsPDU *pdu = opaque;
2307
2308    err = pdu_unmarshal(pdu, offset, "d", &fid);
2309    if (err < 0) {
2310        goto out_nofid;
2311    }
2312    trace_v9fs_remove(pdu->tag, pdu->id, fid);
2313
2314    fidp = get_fid(pdu, fid);
2315    if (fidp == NULL) {
2316        err = -EINVAL;
2317        goto out_nofid;
2318    }
2319    /* if fs driver is not path based, return EOPNOTSUPP */
2320    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2321        err = -EOPNOTSUPP;
2322        goto out_err;
2323    }
2324    /*
2325     * IF the file is unlinked, we cannot reopen
2326     * the file later. So don't reclaim fd
2327     */
2328    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2329    if (err < 0) {
2330        goto out_err;
2331    }
2332    err = v9fs_co_remove(pdu, &fidp->path);
2333    if (!err) {
2334        err = offset;
2335    }
2336out_err:
2337    /* For TREMOVE we need to clunk the fid even on failed remove */
2338    clunk_fid(pdu->s, fidp->fid);
2339    put_fid(pdu, fidp);
2340out_nofid:
2341    complete_pdu(pdu->s, pdu, err);
2342}
2343
2344static void v9fs_unlinkat(void *opaque)
2345{
2346    int err = 0;
2347    V9fsString name;
2348    int32_t dfid, flags;
2349    size_t offset = 7;
2350    V9fsPath path;
2351    V9fsFidState *dfidp;
2352    V9fsPDU *pdu = opaque;
2353
2354    v9fs_string_init(&name);
2355    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
2356    if (err < 0) {
2357        goto out_nofid;
2358    }
2359    dfidp = get_fid(pdu, dfid);
2360    if (dfidp == NULL) {
2361        err = -EINVAL;
2362        goto out_nofid;
2363    }
2364    /*
2365     * IF the file is unlinked, we cannot reopen
2366     * the file later. So don't reclaim fd
2367     */
2368    v9fs_path_init(&path);
2369    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
2370    if (err < 0) {
2371        goto out_err;
2372    }
2373    err = v9fs_mark_fids_unreclaim(pdu, &path);
2374    if (err < 0) {
2375        goto out_err;
2376    }
2377    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, flags);
2378    if (!err) {
2379        err = offset;
2380    }
2381out_err:
2382    put_fid(pdu, dfidp);
2383    v9fs_path_free(&path);
2384out_nofid:
2385    complete_pdu(pdu->s, pdu, err);
2386    v9fs_string_free(&name);
2387}
2388
2389
2390/* Only works with path name based fid */
2391static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
2392                                int32_t newdirfid, V9fsString *name)
2393{
2394    char *end;
2395    int err = 0;
2396    V9fsPath new_path;
2397    V9fsFidState *tfidp;
2398    V9fsState *s = pdu->s;
2399    V9fsFidState *dirfidp = NULL;
2400    char *old_name, *new_name;
2401
2402    v9fs_path_init(&new_path);
2403    if (newdirfid != -1) {
2404        dirfidp = get_fid(pdu, newdirfid);
2405        if (dirfidp == NULL) {
2406            err = -ENOENT;
2407            goto out_nofid;
2408        }
2409        BUG_ON(dirfidp->fid_type != P9_FID_NONE);
2410        v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
2411    } else {
2412        old_name = fidp->path.data;
2413        end = strrchr(old_name, '/');
2414        if (end) {
2415            end++;
2416        } else {
2417            end = old_name;
2418        }
2419        new_name = g_malloc0(end - old_name + name->size + 1);
2420        strncat(new_name, old_name, end - old_name);
2421        strncat(new_name + (end - old_name), name->data, name->size);
2422        v9fs_co_name_to_path(pdu, NULL, new_name, &new_path);
2423        g_free(new_name);
2424    }
2425    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
2426    if (err < 0) {
2427        goto out;
2428    }
2429    /*
2430     * Fixup fid's pointing to the old name to
2431     * start pointing to the new name
2432     */
2433    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2434        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
2435            /* replace the name */
2436            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
2437        }
2438    }
2439out:
2440    if (dirfidp) {
2441        put_fid(pdu, dirfidp);
2442    }
2443    v9fs_path_free(&new_path);
2444out_nofid:
2445    return err;
2446}
2447
2448/* Only works with path name based fid */
2449static void v9fs_rename(void *opaque)
2450{
2451    int32_t fid;
2452    ssize_t err = 0;
2453    size_t offset = 7;
2454    V9fsString name;
2455    int32_t newdirfid;
2456    V9fsFidState *fidp;
2457    V9fsPDU *pdu = opaque;
2458    V9fsState *s = pdu->s;
2459
2460    v9fs_string_init(&name);
2461    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
2462    if (err < 0) {
2463        goto out_nofid;
2464    }
2465    fidp = get_fid(pdu, fid);
2466    if (fidp == NULL) {
2467        err = -ENOENT;
2468        goto out_nofid;
2469    }
2470    BUG_ON(fidp->fid_type != P9_FID_NONE);
2471    /* if fs driver is not path based, return EOPNOTSUPP */
2472    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2473        err = -EOPNOTSUPP;
2474        goto out;
2475    }
2476    v9fs_path_write_lock(s);
2477    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
2478    v9fs_path_unlock(s);
2479    if (!err) {
2480        err = offset;
2481    }
2482out:
2483    put_fid(pdu, fidp);
2484out_nofid:
2485    complete_pdu(s, pdu, err);
2486    v9fs_string_free(&name);
2487}
2488
2489static void v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
2490                               V9fsString *old_name, V9fsPath *newdir,
2491                               V9fsString *new_name)
2492{
2493    V9fsFidState *tfidp;
2494    V9fsPath oldpath, newpath;
2495    V9fsState *s = pdu->s;
2496
2497
2498    v9fs_path_init(&oldpath);
2499    v9fs_path_init(&newpath);
2500    v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
2501    v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
2502
2503    /*
2504     * Fixup fid's pointing to the old name to
2505     * start pointing to the new name
2506     */
2507    for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2508        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
2509            /* replace the name */
2510            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
2511        }
2512    }
2513    v9fs_path_free(&oldpath);
2514    v9fs_path_free(&newpath);
2515}
2516
2517static int v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
2518                                  V9fsString *old_name, int32_t newdirfid,
2519                                  V9fsString *new_name)
2520{
2521    int err = 0;
2522    V9fsState *s = pdu->s;
2523    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
2524
2525    olddirfidp = get_fid(pdu, olddirfid);
2526    if (olddirfidp == NULL) {
2527        err = -ENOENT;
2528        goto out;
2529    }
2530    if (newdirfid != -1) {
2531        newdirfidp = get_fid(pdu, newdirfid);
2532        if (newdirfidp == NULL) {
2533            err = -ENOENT;
2534            goto out;
2535        }
2536    } else {
2537        newdirfidp = get_fid(pdu, olddirfid);
2538    }
2539
2540    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
2541                           &newdirfidp->path, new_name);
2542    if (err < 0) {
2543        goto out;
2544    }
2545    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
2546        /* Only for path based fid  we need to do the below fixup */
2547        v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
2548                           &newdirfidp->path, new_name);
2549    }
2550out:
2551    if (olddirfidp) {
2552        put_fid(pdu, olddirfidp);
2553    }
2554    if (newdirfidp) {
2555        put_fid(pdu, newdirfidp);
2556    }
2557    return err;
2558}
2559
2560static void v9fs_renameat(void *opaque)
2561{
2562    ssize_t err = 0;
2563    size_t offset = 7;
2564    V9fsPDU *pdu = opaque;
2565    V9fsState *s = pdu->s;
2566    int32_t olddirfid, newdirfid;
2567    V9fsString old_name, new_name;
2568
2569    v9fs_string_init(&old_name);
2570    v9fs_string_init(&new_name);
2571    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
2572                        &old_name, &newdirfid, &new_name);
2573    if (err < 0) {
2574        goto out_err;
2575    }
2576
2577    v9fs_path_write_lock(s);
2578    err = v9fs_complete_renameat(pdu, olddirfid,
2579                                 &old_name, newdirfid, &new_name);
2580    v9fs_path_unlock(s);
2581    if (!err) {
2582        err = offset;
2583    }
2584
2585out_err:
2586    complete_pdu(s, pdu, err);
2587    v9fs_string_free(&old_name);
2588    v9fs_string_free(&new_name);
2589}
2590
2591static void v9fs_wstat(void *opaque)
2592{
2593    int32_t fid;
2594    int err = 0;
2595    int16_t unused;
2596    V9fsStat v9stat;
2597    size_t offset = 7;
2598    struct stat stbuf;
2599    V9fsFidState *fidp;
2600    V9fsPDU *pdu = opaque;
2601    V9fsState *s = pdu->s;
2602
2603    v9fs_stat_init(&v9stat);
2604    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
2605    if (err < 0) {
2606        goto out_nofid;
2607    }
2608    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
2609                     v9stat.mode, v9stat.atime, v9stat.mtime);
2610
2611    fidp = get_fid(pdu, fid);
2612    if (fidp == NULL) {
2613        err = -EINVAL;
2614        goto out_nofid;
2615    }
2616    /* do we need to sync the file? */
2617    if (donttouch_stat(&v9stat)) {
2618        err = v9fs_co_fsync(pdu, fidp, 0);
2619        goto out;
2620    }
2621    if (v9stat.mode != -1) {
2622        uint32_t v9_mode;
2623        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2624        if (err < 0) {
2625            goto out;
2626        }
2627        v9_mode = stat_to_v9mode(&stbuf);
2628        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
2629            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
2630            /* Attempting to change the type */
2631            err = -EIO;
2632            goto out;
2633        }
2634        err = v9fs_co_chmod(pdu, &fidp->path,
2635                            v9mode_to_mode(v9stat.mode,
2636                                           &v9stat.extension));
2637        if (err < 0) {
2638            goto out;
2639        }
2640    }
2641    if (v9stat.mtime != -1 || v9stat.atime != -1) {
2642        struct timespec times[2];
2643        if (v9stat.atime != -1) {
2644            times[0].tv_sec = v9stat.atime;
2645            times[0].tv_nsec = 0;
2646        } else {
2647            times[0].tv_nsec = UTIME_OMIT;
2648        }
2649        if (v9stat.mtime != -1) {
2650            times[1].tv_sec = v9stat.mtime;
2651            times[1].tv_nsec = 0;
2652        } else {
2653            times[1].tv_nsec = UTIME_OMIT;
2654        }
2655        err = v9fs_co_utimensat(pdu, &fidp->path, times);
2656        if (err < 0) {
2657            goto out;
2658        }
2659    }
2660    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
2661        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
2662        if (err < 0) {
2663            goto out;
2664        }
2665    }
2666    if (v9stat.name.size != 0) {
2667        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
2668        if (err < 0) {
2669            goto out;
2670        }
2671    }
2672    if (v9stat.length != -1) {
2673        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
2674        if (err < 0) {
2675            goto out;
2676        }
2677    }
2678    err = offset;
2679out:
2680    put_fid(pdu, fidp);
2681out_nofid:
2682    v9fs_stat_free(&v9stat);
2683    complete_pdu(s, pdu, err);
2684}
2685
2686static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
2687{
2688    uint32_t f_type;
2689    uint32_t f_bsize;
2690    uint64_t f_blocks;
2691    uint64_t f_bfree;
2692    uint64_t f_bavail;
2693    uint64_t f_files;
2694    uint64_t f_ffree;
2695    uint64_t fsid_val;
2696    uint32_t f_namelen;
2697    size_t offset = 7;
2698    int32_t bsize_factor;
2699
2700    /*
2701     * compute bsize factor based on host file system block size
2702     * and client msize
2703     */
2704    bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
2705    if (!bsize_factor) {
2706        bsize_factor = 1;
2707    }
2708    f_type  = stbuf->f_type;
2709    f_bsize = stbuf->f_bsize;
2710    f_bsize *= bsize_factor;
2711    /*
2712     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
2713     * adjust(divide) the number of blocks, free blocks and available
2714     * blocks by bsize factor
2715     */
2716    f_blocks = stbuf->f_blocks/bsize_factor;
2717    f_bfree  = stbuf->f_bfree/bsize_factor;
2718    f_bavail = stbuf->f_bavail/bsize_factor;
2719    f_files  = stbuf->f_files;
2720    f_ffree  = stbuf->f_ffree;
2721    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
2722               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
2723    f_namelen = stbuf->f_namelen;
2724
2725    return pdu_marshal(pdu, offset, "ddqqqqqqd",
2726                       f_type, f_bsize, f_blocks, f_bfree,
2727                       f_bavail, f_files, f_ffree,
2728                       fsid_val, f_namelen);
2729}
2730
2731static void v9fs_statfs(void *opaque)
2732{
2733    int32_t fid;
2734    ssize_t retval = 0;
2735    size_t offset = 7;
2736    V9fsFidState *fidp;
2737    struct statfs stbuf;
2738    V9fsPDU *pdu = opaque;
2739    V9fsState *s = pdu->s;
2740
2741    retval = pdu_unmarshal(pdu, offset, "d", &fid);
2742    if (retval < 0) {
2743        goto out_nofid;
2744    }
2745    fidp = get_fid(pdu, fid);
2746    if (fidp == NULL) {
2747        retval = -ENOENT;
2748        goto out_nofid;
2749    }
2750    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
2751    if (retval < 0) {
2752        goto out;
2753    }
2754    retval = v9fs_fill_statfs(s, pdu, &stbuf);
2755    if (retval < 0) {
2756        goto out;
2757    }
2758    retval += offset;
2759out:
2760    put_fid(pdu, fidp);
2761out_nofid:
2762    complete_pdu(s, pdu, retval);
2763}
2764
2765static void v9fs_mknod(void *opaque)
2766{
2767
2768    int mode;
2769    gid_t gid;
2770    int32_t fid;
2771    V9fsQID qid;
2772    int err = 0;
2773    int major, minor;
2774    size_t offset = 7;
2775    V9fsString name;
2776    struct stat stbuf;
2777    V9fsFidState *fidp;
2778    V9fsPDU *pdu = opaque;
2779    V9fsState *s = pdu->s;
2780
2781    v9fs_string_init(&name);
2782    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
2783                        &major, &minor, &gid);
2784    if (err < 0) {
2785        goto out_nofid;
2786    }
2787    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
2788
2789    fidp = get_fid(pdu, fid);
2790    if (fidp == NULL) {
2791        err = -ENOENT;
2792        goto out_nofid;
2793    }
2794    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
2795                        makedev(major, minor), mode, &stbuf);
2796    if (err < 0) {
2797        goto out;
2798    }
2799    stat_to_qid(&stbuf, &qid);
2800    err = pdu_marshal(pdu, offset, "Q", &qid);
2801    if (err < 0) {
2802        goto out;
2803    }
2804    err += offset;
2805    trace_v9fs_mknod_return(pdu->tag, pdu->id,
2806                            qid.type, qid.version, qid.path);
2807out:
2808    put_fid(pdu, fidp);
2809out_nofid:
2810    complete_pdu(s, pdu, err);
2811    v9fs_string_free(&name);
2812}
2813
2814/*
2815 * Implement posix byte range locking code
2816 * Server side handling of locking code is very simple, because 9p server in
2817 * QEMU can handle only one client. And most of the lock handling
2818 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
2819 * do any thing in * qemu 9p server side lock code path.
2820 * So when a TLOCK request comes, always return success
2821 */
2822static void v9fs_lock(void *opaque)
2823{
2824    int8_t status;
2825    V9fsFlock flock;
2826    size_t offset = 7;
2827    struct stat stbuf;
2828    V9fsFidState *fidp;
2829    int32_t fid, err = 0;
2830    V9fsPDU *pdu = opaque;
2831    V9fsState *s = pdu->s;
2832
2833    status = P9_LOCK_ERROR;
2834    v9fs_string_init(&flock.client_id);
2835    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
2836                        &flock.flags, &flock.start, &flock.length,
2837                        &flock.proc_id, &flock.client_id);
2838    if (err < 0) {
2839        goto out_nofid;
2840    }
2841    trace_v9fs_lock(pdu->tag, pdu->id, fid,
2842                    flock.type, flock.start, flock.length);
2843
2844
2845    /* We support only block flag now (that too ignored currently) */
2846    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
2847        err = -EINVAL;
2848        goto out_nofid;
2849    }
2850    fidp = get_fid(pdu, fid);
2851    if (fidp == NULL) {
2852        err = -ENOENT;
2853        goto out_nofid;
2854    }
2855    err = v9fs_co_fstat(pdu, fidp, &stbuf);
2856    if (err < 0) {
2857        goto out;
2858    }
2859    status = P9_LOCK_SUCCESS;
2860out:
2861    put_fid(pdu, fidp);
2862out_nofid:
2863    err = pdu_marshal(pdu, offset, "b", status);
2864    if (err > 0) {
2865        err += offset;
2866    }
2867    trace_v9fs_lock_return(pdu->tag, pdu->id, status);
2868    complete_pdu(s, pdu, err);
2869    v9fs_string_free(&flock.client_id);
2870}
2871
2872/*
2873 * When a TGETLOCK request comes, always return success because all lock
2874 * handling is done by client's VFS layer.
2875 */
2876static void v9fs_getlock(void *opaque)
2877{
2878    size_t offset = 7;
2879    struct stat stbuf;
2880    V9fsFidState *fidp;
2881    V9fsGetlock glock;
2882    int32_t fid, err = 0;
2883    V9fsPDU *pdu = opaque;
2884    V9fsState *s = pdu->s;
2885
2886    v9fs_string_init(&glock.client_id);
2887    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
2888                        &glock.start, &glock.length, &glock.proc_id,
2889                        &glock.client_id);
2890    if (err < 0) {
2891        goto out_nofid;
2892    }
2893    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
2894                       glock.type, glock.start, glock.length);
2895
2896    fidp = get_fid(pdu, fid);
2897    if (fidp == NULL) {
2898        err = -ENOENT;
2899        goto out_nofid;
2900    }
2901    err = v9fs_co_fstat(pdu, fidp, &stbuf);
2902    if (err < 0) {
2903        goto out;
2904    }
2905    glock.type = P9_LOCK_TYPE_UNLCK;
2906    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
2907                          glock.start, glock.length, glock.proc_id,
2908                          &glock.client_id);
2909    if (err < 0) {
2910        goto out;
2911    }
2912    err += offset;
2913    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
2914                              glock.length, glock.proc_id);
2915out:
2916    put_fid(pdu, fidp);
2917out_nofid:
2918    complete_pdu(s, pdu, err);
2919    v9fs_string_free(&glock.client_id);
2920}
2921
2922static void v9fs_mkdir(void *opaque)
2923{
2924    V9fsPDU *pdu = opaque;
2925    size_t offset = 7;
2926    int32_t fid;
2927    struct stat stbuf;
2928    V9fsQID qid;
2929    V9fsString name;
2930    V9fsFidState *fidp;
2931    gid_t gid;
2932    int mode;
2933    int err = 0;
2934
2935    v9fs_string_init(&name);
2936    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
2937    if (err < 0) {
2938        goto out_nofid;
2939    }
2940    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
2941
2942    fidp = get_fid(pdu, fid);
2943    if (fidp == NULL) {
2944        err = -ENOENT;
2945        goto out_nofid;
2946    }
2947    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
2948    if (err < 0) {
2949        goto out;
2950    }
2951    stat_to_qid(&stbuf, &qid);
2952    err = pdu_marshal(pdu, offset, "Q", &qid);
2953    if (err < 0) {
2954        goto out;
2955    }
2956    err += offset;
2957    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
2958                            qid.type, qid.version, qid.path, err);
2959out:
2960    put_fid(pdu, fidp);
2961out_nofid:
2962    complete_pdu(pdu->s, pdu, err);
2963    v9fs_string_free(&name);
2964}
2965
2966static void v9fs_xattrwalk(void *opaque)
2967{
2968    int64_t size;
2969    V9fsString name;
2970    ssize_t err = 0;
2971    size_t offset = 7;
2972    int32_t fid, newfid;
2973    V9fsFidState *file_fidp;
2974    V9fsFidState *xattr_fidp = NULL;
2975    V9fsPDU *pdu = opaque;
2976    V9fsState *s = pdu->s;
2977
2978    v9fs_string_init(&name);
2979    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
2980    if (err < 0) {
2981        goto out_nofid;
2982    }
2983    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
2984
2985    file_fidp = get_fid(pdu, fid);
2986    if (file_fidp == NULL) {
2987        err = -ENOENT;
2988        goto out_nofid;
2989    }
2990    xattr_fidp = alloc_fid(s, newfid);
2991    if (xattr_fidp == NULL) {
2992        err = -EINVAL;
2993        goto out;
2994    }
2995    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
2996    if (name.data == NULL) {
2997        /*
2998         * listxattr request. Get the size first
2999         */
3000        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3001        if (size < 0) {
3002            err = size;
3003            clunk_fid(s, xattr_fidp->fid);
3004            goto out;
3005        }
3006        /*
3007         * Read the xattr value
3008         */
3009        xattr_fidp->fs.xattr.len = size;
3010        xattr_fidp->fid_type = P9_FID_XATTR;
3011        xattr_fidp->fs.xattr.copied_len = -1;
3012        if (size) {
3013            xattr_fidp->fs.xattr.value = g_malloc(size);
3014            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3015                                     xattr_fidp->fs.xattr.value,
3016                                     xattr_fidp->fs.xattr.len);
3017            if (err < 0) {
3018                clunk_fid(s, xattr_fidp->fid);
3019                goto out;
3020            }
3021        }
3022        err = pdu_marshal(pdu, offset, "q", size);
3023        if (err < 0) {
3024            goto out;
3025        }
3026        err += offset;
3027    } else {
3028        /*
3029         * specific xattr fid. We check for xattr
3030         * presence also collect the xattr size
3031         */
3032        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3033                                 &name, NULL, 0);
3034        if (size < 0) {
3035            err = size;
3036            clunk_fid(s, xattr_fidp->fid);
3037            goto out;
3038        }
3039        /*
3040         * Read the xattr value
3041         */
3042        xattr_fidp->fs.xattr.len = size;
3043        xattr_fidp->fid_type = P9_FID_XATTR;
3044        xattr_fidp->fs.xattr.copied_len = -1;
3045        if (size) {
3046            xattr_fidp->fs.xattr.value = g_malloc(size);
3047            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3048                                    &name, xattr_fidp->fs.xattr.value,
3049                                    xattr_fidp->fs.xattr.len);
3050            if (err < 0) {
3051                clunk_fid(s, xattr_fidp->fid);
3052                goto out;
3053            }
3054        }
3055        err = pdu_marshal(pdu, offset, "q", size);
3056        if (err < 0) {
3057            goto out;
3058        }
3059        err += offset;
3060    }
3061    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3062out:
3063    put_fid(pdu, file_fidp);
3064    if (xattr_fidp) {
3065        put_fid(pdu, xattr_fidp);
3066    }
3067out_nofid:
3068    complete_pdu(s, pdu, err);
3069    v9fs_string_free(&name);
3070}
3071
3072static void v9fs_xattrcreate(void *opaque)
3073{
3074    int flags;
3075    int32_t fid;
3076    int64_t size;
3077    ssize_t err = 0;
3078    V9fsString name;
3079    size_t offset = 7;
3080    V9fsFidState *file_fidp;
3081    V9fsFidState *xattr_fidp;
3082    V9fsPDU *pdu = opaque;
3083    V9fsState *s = pdu->s;
3084
3085    v9fs_string_init(&name);
3086    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3087    if (err < 0) {
3088        goto out_nofid;
3089    }
3090    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3091
3092    file_fidp = get_fid(pdu, fid);
3093    if (file_fidp == NULL) {
3094        err = -EINVAL;
3095        goto out_nofid;
3096    }
3097    /* Make the file fid point to xattr */
3098    xattr_fidp = file_fidp;
3099    xattr_fidp->fid_type = P9_FID_XATTR;
3100    xattr_fidp->fs.xattr.copied_len = 0;
3101    xattr_fidp->fs.xattr.len = size;
3102    xattr_fidp->fs.xattr.flags = flags;
3103    v9fs_string_init(&xattr_fidp->fs.xattr.name);
3104    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3105    xattr_fidp->fs.xattr.value = g_malloc(size);
3106    err = offset;
3107    put_fid(pdu, file_fidp);
3108out_nofid:
3109    complete_pdu(s, pdu, err);
3110    v9fs_string_free(&name);
3111}
3112
3113static void v9fs_readlink(void *opaque)
3114{
3115    V9fsPDU *pdu = opaque;
3116    size_t offset = 7;
3117    V9fsString target;
3118    int32_t fid;
3119    int err = 0;
3120    V9fsFidState *fidp;
3121
3122    err = pdu_unmarshal(pdu, offset, "d", &fid);
3123    if (err < 0) {
3124        goto out_nofid;
3125    }
3126    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3127    fidp = get_fid(pdu, fid);
3128    if (fidp == NULL) {
3129        err = -ENOENT;
3130        goto out_nofid;
3131    }
3132
3133    v9fs_string_init(&target);
3134    err = v9fs_co_readlink(pdu, &fidp->path, &target);
3135    if (err < 0) {
3136        goto out;
3137    }
3138    err = pdu_marshal(pdu, offset, "s", &target);
3139    if (err < 0) {
3140        v9fs_string_free(&target);
3141        goto out;
3142    }
3143    err += offset;
3144    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3145    v9fs_string_free(&target);
3146out:
3147    put_fid(pdu, fidp);
3148out_nofid:
3149    complete_pdu(pdu->s, pdu, err);
3150}
3151
3152static CoroutineEntry *pdu_co_handlers[] = {
3153    [P9_TREADDIR] = v9fs_readdir,
3154    [P9_TSTATFS] = v9fs_statfs,
3155    [P9_TGETATTR] = v9fs_getattr,
3156    [P9_TSETATTR] = v9fs_setattr,
3157    [P9_TXATTRWALK] = v9fs_xattrwalk,
3158    [P9_TXATTRCREATE] = v9fs_xattrcreate,
3159    [P9_TMKNOD] = v9fs_mknod,
3160    [P9_TRENAME] = v9fs_rename,
3161    [P9_TLOCK] = v9fs_lock,
3162    [P9_TGETLOCK] = v9fs_getlock,
3163    [P9_TRENAMEAT] = v9fs_renameat,
3164    [P9_TREADLINK] = v9fs_readlink,
3165    [P9_TUNLINKAT] = v9fs_unlinkat,
3166    [P9_TMKDIR] = v9fs_mkdir,
3167    [P9_TVERSION] = v9fs_version,
3168    [P9_TLOPEN] = v9fs_open,
3169    [P9_TATTACH] = v9fs_attach,
3170    [P9_TSTAT] = v9fs_stat,
3171    [P9_TWALK] = v9fs_walk,
3172    [P9_TCLUNK] = v9fs_clunk,
3173    [P9_TFSYNC] = v9fs_fsync,
3174    [P9_TOPEN] = v9fs_open,
3175    [P9_TREAD] = v9fs_read,
3176#if 0
3177    [P9_TAUTH] = v9fs_auth,
3178#endif
3179    [P9_TFLUSH] = v9fs_flush,
3180    [P9_TLINK] = v9fs_link,
3181    [P9_TSYMLINK] = v9fs_symlink,
3182    [P9_TCREATE] = v9fs_create,
3183    [P9_TLCREATE] = v9fs_lcreate,
3184    [P9_TWRITE] = v9fs_write,
3185    [P9_TWSTAT] = v9fs_wstat,
3186    [P9_TREMOVE] = v9fs_remove,
3187};
3188
3189static void v9fs_op_not_supp(void *opaque)
3190{
3191    V9fsPDU *pdu = opaque;
3192    complete_pdu(pdu->s, pdu, -EOPNOTSUPP);
3193}
3194
3195static void v9fs_fs_ro(void *opaque)
3196{
3197    V9fsPDU *pdu = opaque;
3198    complete_pdu(pdu->s, pdu, -EROFS);
3199}
3200
3201static inline bool is_read_only_op(V9fsPDU *pdu)
3202{
3203    switch (pdu->id) {
3204    case P9_TREADDIR:
3205    case P9_TSTATFS:
3206    case P9_TGETATTR:
3207    case P9_TXATTRWALK:
3208    case P9_TLOCK:
3209    case P9_TGETLOCK:
3210    case P9_TREADLINK:
3211    case P9_TVERSION:
3212    case P9_TLOPEN:
3213    case P9_TATTACH:
3214    case P9_TSTAT:
3215    case P9_TWALK:
3216    case P9_TCLUNK:
3217    case P9_TFSYNC:
3218    case P9_TOPEN:
3219    case P9_TREAD:
3220    case P9_TAUTH:
3221    case P9_TFLUSH:
3222        return 1;
3223    default:
3224        return 0;
3225    }
3226}
3227
3228static void submit_pdu(V9fsState *s, V9fsPDU *pdu)
3229{
3230    Coroutine *co;
3231    CoroutineEntry *handler;
3232
3233    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
3234        (pdu_co_handlers[pdu->id] == NULL)) {
3235        handler = v9fs_op_not_supp;
3236    } else {
3237        handler = pdu_co_handlers[pdu->id];
3238    }
3239
3240    if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
3241        handler = v9fs_fs_ro;
3242    }
3243    co = qemu_coroutine_create(handler);
3244    qemu_coroutine_enter(co, pdu);
3245}
3246
3247void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
3248{
3249    V9fsState *s = (V9fsState *)vdev;
3250    V9fsPDU *pdu;
3251    ssize_t len;
3252
3253    while ((pdu = alloc_pdu(s)) &&
3254            (len = virtqueue_pop(vq, &pdu->elem)) != 0) {
3255        uint8_t *ptr;
3256        pdu->s = s;
3257        BUG_ON(pdu->elem.out_num == 0 || pdu->elem.in_num == 0);
3258        BUG_ON(pdu->elem.out_sg[0].iov_len < 7);
3259
3260        ptr = pdu->elem.out_sg[0].iov_base;
3261
3262        pdu->size = le32_to_cpu(*(uint32_t *)ptr);
3263        pdu->id = ptr[4];
3264        pdu->tag = le16_to_cpu(*(uint16_t *)(ptr + 5));
3265        qemu_co_queue_init(&pdu->complete);
3266        submit_pdu(s, pdu);
3267    }
3268    free_pdu(s, pdu);
3269}
3270
3271static void __attribute__((__constructor__)) virtio_9p_set_fd_limit(void)
3272{
3273    struct rlimit rlim;
3274    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
3275        fprintf(stderr, "Failed to get the resource limit\n");
3276        exit(1);
3277    }
3278    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
3279    open_fd_rc = rlim.rlim_cur/2;
3280}
3281