linux/samples/mic/mpssd/mpssd.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Intel MIC Platform Software Stack (MPSS)
   4 *
   5 * Copyright(c) 2013 Intel Corporation.
   6 *
   7 * Intel MIC User Space Tools.
   8 */
   9
  10#define _GNU_SOURCE
  11
  12#include <stdlib.h>
  13#include <fcntl.h>
  14#include <getopt.h>
  15#include <assert.h>
  16#include <unistd.h>
  17#include <stdbool.h>
  18#include <signal.h>
  19#include <poll.h>
  20#include <features.h>
  21#include <sys/types.h>
  22#include <sys/stat.h>
  23#include <sys/mman.h>
  24#include <sys/socket.h>
  25#include <linux/virtio_ring.h>
  26#include <linux/virtio_net.h>
  27#include <linux/virtio_console.h>
  28#include <linux/virtio_blk.h>
  29#include <linux/version.h>
  30#include "mpssd.h"
  31#include <linux/mic_ioctl.h>
  32#include <linux/mic_common.h>
  33#include <tools/endian.h>
  34
  35static void *init_mic(void *arg);
  36
  37static FILE *logfp;
  38static struct mic_info mic_list;
  39
  40#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  41
  42#define min_t(type, x, y) ({                            \
  43                type __min1 = (x);                      \
  44                type __min2 = (y);                      \
  45                __min1 < __min2 ? __min1 : __min2; })
  46
  47/* align addr on a size boundary - adjust address up/down if needed */
  48#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
  49#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
  50
  51/* align addr on a size boundary - adjust address up if needed */
  52#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
  53
  54/* to align the pointer to the (next) page boundary */
  55#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
  56
  57#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
  58
  59#define GSO_ENABLED             1
  60#define MAX_GSO_SIZE            (64 * 1024)
  61#define ETH_H_LEN               14
  62#define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  63#define MIC_DEVICE_PAGE_END     0x1000
  64
  65#ifndef VIRTIO_NET_HDR_F_DATA_VALID
  66#define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
  67#endif
  68
  69static struct {
  70        struct mic_device_desc dd;
  71        struct mic_vqconfig vqconfig[2];
  72        __u32 host_features, guest_acknowledgements;
  73        struct virtio_console_config cons_config;
  74} virtcons_dev_page = {
  75        .dd = {
  76                .type = VIRTIO_ID_CONSOLE,
  77                .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  78                .feature_len = sizeof(virtcons_dev_page.host_features),
  79                .config_len = sizeof(virtcons_dev_page.cons_config),
  80        },
  81        .vqconfig[0] = {
  82                .num = htole16(MIC_VRING_ENTRIES),
  83        },
  84        .vqconfig[1] = {
  85                .num = htole16(MIC_VRING_ENTRIES),
  86        },
  87};
  88
  89static struct {
  90        struct mic_device_desc dd;
  91        struct mic_vqconfig vqconfig[2];
  92        __u32 host_features, guest_acknowledgements;
  93        struct virtio_net_config net_config;
  94} virtnet_dev_page = {
  95        .dd = {
  96                .type = VIRTIO_ID_NET,
  97                .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
  98                .feature_len = sizeof(virtnet_dev_page.host_features),
  99                .config_len = sizeof(virtnet_dev_page.net_config),
 100        },
 101        .vqconfig[0] = {
 102                .num = htole16(MIC_VRING_ENTRIES),
 103        },
 104        .vqconfig[1] = {
 105                .num = htole16(MIC_VRING_ENTRIES),
 106        },
 107#if GSO_ENABLED
 108        .host_features = htole32(
 109                1 << VIRTIO_NET_F_CSUM |
 110                1 << VIRTIO_NET_F_GSO |
 111                1 << VIRTIO_NET_F_GUEST_TSO4 |
 112                1 << VIRTIO_NET_F_GUEST_TSO6 |
 113                1 << VIRTIO_NET_F_GUEST_ECN),
 114#else
 115                .host_features = 0,
 116#endif
 117};
 118
 119static const char *mic_config_dir = "/etc/mpss";
 120static const char *virtblk_backend = "VIRTBLK_BACKEND";
 121static struct {
 122        struct mic_device_desc dd;
 123        struct mic_vqconfig vqconfig[1];
 124        __u32 host_features, guest_acknowledgements;
 125        struct virtio_blk_config blk_config;
 126} virtblk_dev_page = {
 127        .dd = {
 128                .type = VIRTIO_ID_BLOCK,
 129                .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
 130                .feature_len = sizeof(virtblk_dev_page.host_features),
 131                .config_len = sizeof(virtblk_dev_page.blk_config),
 132        },
 133        .vqconfig[0] = {
 134                .num = htole16(MIC_VRING_ENTRIES),
 135        },
 136        .host_features =
 137                htole32(1<<VIRTIO_BLK_F_SEG_MAX),
 138        .blk_config = {
 139                .seg_max = htole32(MIC_VRING_ENTRIES - 2),
 140                .capacity = htole64(0),
 141         }
 142};
 143
 144static char *myname;
 145
 146static int
 147tap_configure(struct mic_info *mic, char *dev)
 148{
 149        pid_t pid;
 150        char *ifargv[7];
 151        char ipaddr[IFNAMSIZ];
 152        int ret = 0;
 153
 154        pid = fork();
 155        if (pid == 0) {
 156                ifargv[0] = "ip";
 157                ifargv[1] = "link";
 158                ifargv[2] = "set";
 159                ifargv[3] = dev;
 160                ifargv[4] = "up";
 161                ifargv[5] = NULL;
 162                mpsslog("Configuring %s\n", dev);
 163                ret = execvp("ip", ifargv);
 164                if (ret < 0) {
 165                        mpsslog("%s execvp failed errno %s\n",
 166                                mic->name, strerror(errno));
 167                        return ret;
 168                }
 169        }
 170        if (pid < 0) {
 171                mpsslog("%s fork failed errno %s\n",
 172                        mic->name, strerror(errno));
 173                return ret;
 174        }
 175
 176        ret = waitpid(pid, NULL, 0);
 177        if (ret < 0) {
 178                mpsslog("%s waitpid failed errno %s\n",
 179                        mic->name, strerror(errno));
 180                return ret;
 181        }
 182
 183        snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
 184
 185        pid = fork();
 186        if (pid == 0) {
 187                ifargv[0] = "ip";
 188                ifargv[1] = "addr";
 189                ifargv[2] = "add";
 190                ifargv[3] = ipaddr;
 191                ifargv[4] = "dev";
 192                ifargv[5] = dev;
 193                ifargv[6] = NULL;
 194                mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
 195                ret = execvp("ip", ifargv);
 196                if (ret < 0) {
 197                        mpsslog("%s execvp failed errno %s\n",
 198                                mic->name, strerror(errno));
 199                        return ret;
 200                }
 201        }
 202        if (pid < 0) {
 203                mpsslog("%s fork failed errno %s\n",
 204                        mic->name, strerror(errno));
 205                return ret;
 206        }
 207
 208        ret = waitpid(pid, NULL, 0);
 209        if (ret < 0) {
 210                mpsslog("%s waitpid failed errno %s\n",
 211                        mic->name, strerror(errno));
 212                return ret;
 213        }
 214        mpsslog("MIC name %s %s %d DONE!\n",
 215                mic->name, __func__, __LINE__);
 216        return 0;
 217}
 218
 219static int tun_alloc(struct mic_info *mic, char *dev)
 220{
 221        struct ifreq ifr;
 222        int fd, err;
 223#if GSO_ENABLED
 224        unsigned offload;
 225#endif
 226        fd = open("/dev/net/tun", O_RDWR);
 227        if (fd < 0) {
 228                mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
 229                goto done;
 230        }
 231
 232        memset(&ifr, 0, sizeof(ifr));
 233
 234        ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 235        if (*dev)
 236                strncpy(ifr.ifr_name, dev, IFNAMSIZ);
 237
 238        err = ioctl(fd, TUNSETIFF, (void *)&ifr);
 239        if (err < 0) {
 240                mpsslog("%s %s %d TUNSETIFF failed %s\n",
 241                        mic->name, __func__, __LINE__, strerror(errno));
 242                close(fd);
 243                return err;
 244        }
 245#if GSO_ENABLED
 246        offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
 247
 248        err = ioctl(fd, TUNSETOFFLOAD, offload);
 249        if (err < 0) {
 250                mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
 251                        mic->name, __func__, __LINE__, strerror(errno));
 252                close(fd);
 253                return err;
 254        }
 255#endif
 256        strcpy(dev, ifr.ifr_name);
 257        mpsslog("Created TAP %s\n", dev);
 258done:
 259        return fd;
 260}
 261
 262#define NET_FD_VIRTIO_NET 0
 263#define NET_FD_TUN 1
 264#define MAX_NET_FD 2
 265
 266static void set_dp(struct mic_info *mic, int type, void *dp)
 267{
 268        switch (type) {
 269        case VIRTIO_ID_CONSOLE:
 270                mic->mic_console.console_dp = dp;
 271                return;
 272        case VIRTIO_ID_NET:
 273                mic->mic_net.net_dp = dp;
 274                return;
 275        case VIRTIO_ID_BLOCK:
 276                mic->mic_virtblk.block_dp = dp;
 277                return;
 278        }
 279        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 280        assert(0);
 281}
 282
 283static void *get_dp(struct mic_info *mic, int type)
 284{
 285        switch (type) {
 286        case VIRTIO_ID_CONSOLE:
 287                return mic->mic_console.console_dp;
 288        case VIRTIO_ID_NET:
 289                return mic->mic_net.net_dp;
 290        case VIRTIO_ID_BLOCK:
 291                return mic->mic_virtblk.block_dp;
 292        }
 293        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 294        assert(0);
 295        return NULL;
 296}
 297
 298static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
 299{
 300        struct mic_device_desc *d;
 301        int i;
 302        void *dp = get_dp(mic, type);
 303
 304        for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
 305                i += mic_total_desc_size(d)) {
 306                d = dp + i;
 307
 308                /* End of list */
 309                if (d->type == 0)
 310                        break;
 311
 312                if (d->type == -1)
 313                        continue;
 314
 315                mpsslog("%s %s d-> type %d d %p\n",
 316                        mic->name, __func__, d->type, d);
 317
 318                if (d->type == (__u8)type)
 319                        return d;
 320        }
 321        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 322        return NULL;
 323}
 324
 325/* See comments in vhost.c for explanation of next_desc() */
 326static unsigned next_desc(struct vring_desc *desc)
 327{
 328        unsigned int next;
 329
 330        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
 331                return -1U;
 332        next = le16toh(desc->next);
 333        return next;
 334}
 335
 336/* Sum up all the IOVEC length */
 337static ssize_t
 338sum_iovec_len(struct mic_copy_desc *copy)
 339{
 340        ssize_t sum = 0;
 341        unsigned int i;
 342
 343        for (i = 0; i < copy->iovcnt; i++)
 344                sum += copy->iov[i].iov_len;
 345        return sum;
 346}
 347
 348static inline void verify_out_len(struct mic_info *mic,
 349        struct mic_copy_desc *copy)
 350{
 351        if (copy->out_len != sum_iovec_len(copy)) {
 352                mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
 353                        mic->name, __func__, __LINE__,
 354                        copy->out_len, sum_iovec_len(copy));
 355                assert(copy->out_len == sum_iovec_len(copy));
 356        }
 357}
 358
 359/* Display an iovec */
 360static void
 361disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
 362           const char *s, int line)
 363{
 364        unsigned int i;
 365
 366        for (i = 0; i < copy->iovcnt; i++)
 367                mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
 368                        mic->name, s, line, i,
 369                        copy->iov[i].iov_base, copy->iov[i].iov_len);
 370}
 371
 372static inline __u16 read_avail_idx(struct mic_vring *vr)
 373{
 374        return READ_ONCE(vr->info->avail_idx);
 375}
 376
 377static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
 378                                struct mic_copy_desc *copy, ssize_t len)
 379{
 380        copy->vr_idx = tx ? 0 : 1;
 381        copy->update_used = true;
 382        if (type == VIRTIO_ID_NET)
 383                copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
 384        else
 385                copy->iov[0].iov_len = len;
 386}
 387
 388/* Central API which triggers the copies */
 389static int
 390mic_virtio_copy(struct mic_info *mic, int fd,
 391                struct mic_vring *vr, struct mic_copy_desc *copy)
 392{
 393        int ret;
 394
 395        ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
 396        if (ret) {
 397                mpsslog("%s %s %d errno %s ret %d\n",
 398                        mic->name, __func__, __LINE__,
 399                        strerror(errno), ret);
 400        }
 401        return ret;
 402}
 403
 404static inline unsigned _vring_size(unsigned int num, unsigned long align)
 405{
 406        return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
 407                                + align - 1) & ~(align - 1))
 408                + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
 409}
 410
 411/*
 412 * This initialization routine requires at least one
 413 * vring i.e. vr0. vr1 is optional.
 414 */
 415static void *
 416init_vr(struct mic_info *mic, int fd, int type,
 417        struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
 418{
 419        int vr_size;
 420        char *va;
 421
 422        vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
 423                                         MIC_VIRTIO_RING_ALIGN) +
 424                             sizeof(struct _mic_vring_info));
 425        va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
 426                PROT_READ, MAP_SHARED, fd, 0);
 427        if (MAP_FAILED == va) {
 428                mpsslog("%s %s %d mmap failed errno %s\n",
 429                        mic->name, __func__, __LINE__,
 430                        strerror(errno));
 431                goto done;
 432        }
 433        set_dp(mic, type, va);
 434        vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
 435        vr0->info = vr0->va +
 436                _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
 437        vring_init(&vr0->vr,
 438                   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
 439        mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
 440                __func__, mic->name, vr0->va, vr0->info, vr_size,
 441                _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 442        mpsslog("magic 0x%x expected 0x%x\n",
 443                le32toh(vr0->info->magic), MIC_MAGIC + type);
 444        assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
 445        if (vr1) {
 446                vr1->va = (struct mic_vring *)
 447                        &va[MIC_DEVICE_PAGE_END + vr_size];
 448                vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
 449                        MIC_VIRTIO_RING_ALIGN);
 450                vring_init(&vr1->vr,
 451                           MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
 452                mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
 453                        __func__, mic->name, vr1->va, vr1->info, vr_size,
 454                        _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 455                mpsslog("magic 0x%x expected 0x%x\n",
 456                        le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
 457                assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
 458        }
 459done:
 460        return va;
 461}
 462
 463static int
 464wait_for_card_driver(struct mic_info *mic, int fd, int type)
 465{
 466        struct pollfd pollfd;
 467        int err;
 468        struct mic_device_desc *desc = get_device_desc(mic, type);
 469        __u8 prev_status;
 470
 471        if (!desc)
 472                return -ENODEV;
 473        prev_status = desc->status;
 474        pollfd.fd = fd;
 475        mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
 476                mic->name, __func__, type, desc->status);
 477
 478        while (1) {
 479                pollfd.events = POLLIN;
 480                pollfd.revents = 0;
 481                err = poll(&pollfd, 1, -1);
 482                if (err < 0) {
 483                        mpsslog("%s %s poll failed %s\n",
 484                                mic->name, __func__, strerror(errno));
 485                        continue;
 486                }
 487
 488                if (pollfd.revents) {
 489                        if (desc->status != prev_status) {
 490                                mpsslog("%s %s Waiting... desc-> type %d "
 491                                        "status 0x%x\n",
 492                                        mic->name, __func__, type,
 493                                        desc->status);
 494                                prev_status = desc->status;
 495                        }
 496                        if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
 497                                mpsslog("%s %s poll.revents %d\n",
 498                                        mic->name, __func__, pollfd.revents);
 499                                mpsslog("%s %s desc-> type %d status 0x%x\n",
 500                                        mic->name, __func__, type,
 501                                        desc->status);
 502                                break;
 503                        }
 504                }
 505        }
 506        return 0;
 507}
 508
 509/* Spin till we have some descriptors */
 510static void
 511spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
 512{
 513        __u16 avail_idx = read_avail_idx(vr);
 514
 515        while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
 516#ifdef DEBUG
 517                mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
 518                        mic->name, __func__,
 519                        le16toh(vr->vr.avail->idx), vr->info->avail_idx);
 520#endif
 521                sched_yield();
 522        }
 523}
 524
 525static void *
 526virtio_net(void *arg)
 527{
 528        static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
 529        static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
 530        struct iovec vnet_iov[2][2] = {
 531                { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
 532                  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
 533                { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
 534                  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
 535        };
 536        struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
 537        struct mic_info *mic = (struct mic_info *)arg;
 538        char if_name[IFNAMSIZ];
 539        struct pollfd net_poll[MAX_NET_FD];
 540        struct mic_vring tx_vr, rx_vr;
 541        struct mic_copy_desc copy;
 542        struct mic_device_desc *desc;
 543        int err;
 544
 545        snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
 546        mic->mic_net.tap_fd = tun_alloc(mic, if_name);
 547        if (mic->mic_net.tap_fd < 0)
 548                goto done;
 549
 550        if (tap_configure(mic, if_name))
 551                goto done;
 552        mpsslog("MIC name %s id %d\n", mic->name, mic->id);
 553
 554        net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
 555        net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
 556        net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
 557        net_poll[NET_FD_TUN].events = POLLIN;
 558
 559        if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
 560                                  VIRTIO_ID_NET, &tx_vr, &rx_vr,
 561                virtnet_dev_page.dd.num_vq)) {
 562                mpsslog("%s init_vr failed %s\n",
 563                        mic->name, strerror(errno));
 564                goto done;
 565        }
 566
 567        copy.iovcnt = 2;
 568        desc = get_device_desc(mic, VIRTIO_ID_NET);
 569
 570        while (1) {
 571                ssize_t len;
 572
 573                net_poll[NET_FD_VIRTIO_NET].revents = 0;
 574                net_poll[NET_FD_TUN].revents = 0;
 575
 576                /* Start polling for data from tap and virtio net */
 577                err = poll(net_poll, 2, -1);
 578                if (err < 0) {
 579                        mpsslog("%s poll failed %s\n",
 580                                __func__, strerror(errno));
 581                        continue;
 582                }
 583                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 584                        err = wait_for_card_driver(mic,
 585                                                   mic->mic_net.virtio_net_fd,
 586                                                   VIRTIO_ID_NET);
 587                        if (err) {
 588                                mpsslog("%s %s %d Exiting...\n",
 589                                        mic->name, __func__, __LINE__);
 590                                break;
 591                        }
 592                }
 593                /*
 594                 * Check if there is data to be read from TUN and write to
 595                 * virtio net fd if there is.
 596                 */
 597                if (net_poll[NET_FD_TUN].revents & POLLIN) {
 598                        copy.iov = iov0;
 599                        len = readv(net_poll[NET_FD_TUN].fd,
 600                                copy.iov, copy.iovcnt);
 601                        if (len > 0) {
 602                                struct virtio_net_hdr *hdr
 603                                        = (struct virtio_net_hdr *)vnet_hdr[0];
 604
 605                                /* Disable checksums on the card since we are on
 606                                   a reliable PCIe link */
 607                                hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
 608#ifdef DEBUG
 609                                mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
 610                                        __func__, __LINE__, hdr->flags);
 611                                mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
 612                                        copy.out_len, hdr->gso_type);
 613#endif
 614#ifdef DEBUG
 615                                disp_iovec(mic, copy, __func__, __LINE__);
 616                                mpsslog("%s %s %d read from tap 0x%lx\n",
 617                                        mic->name, __func__, __LINE__,
 618                                        len);
 619#endif
 620                                spin_for_descriptors(mic, &tx_vr);
 621                                txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
 622                                             len);
 623
 624                                err = mic_virtio_copy(mic,
 625                                        mic->mic_net.virtio_net_fd, &tx_vr,
 626                                        &copy);
 627                                if (err < 0) {
 628                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 629                                                mic->name, __func__, __LINE__,
 630                                                strerror(errno));
 631                                }
 632                                if (!err)
 633                                        verify_out_len(mic, &copy);
 634#ifdef DEBUG
 635                                disp_iovec(mic, copy, __func__, __LINE__);
 636                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 637                                        mic->name, __func__, __LINE__,
 638                                        sum_iovec_len(&copy));
 639#endif
 640                                /* Reinitialize IOV for next run */
 641                                iov0[1].iov_len = MAX_NET_PKT_SIZE;
 642                        } else if (len < 0) {
 643                                disp_iovec(mic, &copy, __func__, __LINE__);
 644                                mpsslog("%s %s %d read failed %s ", mic->name,
 645                                        __func__, __LINE__, strerror(errno));
 646                                mpsslog("cnt %d sum %zd\n",
 647                                        copy.iovcnt, sum_iovec_len(&copy));
 648                        }
 649                }
 650
 651                /*
 652                 * Check if there is data to be read from virtio net and
 653                 * write to TUN if there is.
 654                 */
 655                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
 656                        while (rx_vr.info->avail_idx !=
 657                                le16toh(rx_vr.vr.avail->idx)) {
 658                                copy.iov = iov1;
 659                                txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
 660                                             MAX_NET_PKT_SIZE
 661                                        + sizeof(struct virtio_net_hdr));
 662
 663                                err = mic_virtio_copy(mic,
 664                                        mic->mic_net.virtio_net_fd, &rx_vr,
 665                                        &copy);
 666                                if (!err) {
 667#ifdef DEBUG
 668                                        struct virtio_net_hdr *hdr
 669                                                = (struct virtio_net_hdr *)
 670                                                        vnet_hdr[1];
 671
 672                                        mpsslog("%s %s %d hdr->flags 0x%x, ",
 673                                                mic->name, __func__, __LINE__,
 674                                                hdr->flags);
 675                                        mpsslog("out_len %d gso_type 0x%x\n",
 676                                                copy.out_len,
 677                                                hdr->gso_type);
 678#endif
 679                                        /* Set the correct output iov_len */
 680                                        iov1[1].iov_len = copy.out_len -
 681                                                sizeof(struct virtio_net_hdr);
 682                                        verify_out_len(mic, &copy);
 683#ifdef DEBUG
 684                                        disp_iovec(mic, copy, __func__,
 685                                                   __LINE__);
 686                                        mpsslog("%s %s %d ",
 687                                                mic->name, __func__, __LINE__);
 688                                        mpsslog("read from net 0x%lx\n",
 689                                                sum_iovec_len(copy));
 690#endif
 691                                        len = writev(net_poll[NET_FD_TUN].fd,
 692                                                copy.iov, copy.iovcnt);
 693                                        if (len != sum_iovec_len(&copy)) {
 694                                                mpsslog("Tun write failed %s ",
 695                                                        strerror(errno));
 696                                                mpsslog("len 0x%zx ", len);
 697                                                mpsslog("read_len 0x%zx\n",
 698                                                        sum_iovec_len(&copy));
 699                                        } else {
 700#ifdef DEBUG
 701                                                disp_iovec(mic, &copy, __func__,
 702                                                           __LINE__);
 703                                                mpsslog("%s %s %d ",
 704                                                        mic->name, __func__,
 705                                                        __LINE__);
 706                                                mpsslog("wrote to tap 0x%lx\n",
 707                                                        len);
 708#endif
 709                                        }
 710                                } else {
 711                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 712                                                mic->name, __func__, __LINE__,
 713                                                strerror(errno));
 714                                        break;
 715                                }
 716                        }
 717                }
 718                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 719                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 720        }
 721done:
 722        pthread_exit(NULL);
 723}
 724
 725/* virtio_console */
 726#define VIRTIO_CONSOLE_FD 0
 727#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
 728#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
 729#define MAX_BUFFER_SIZE PAGE_SIZE
 730
 731static void *
 732virtio_console(void *arg)
 733{
 734        static __u8 vcons_buf[2][PAGE_SIZE];
 735        struct iovec vcons_iov[2] = {
 736                { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
 737                { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
 738        };
 739        struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
 740        struct mic_info *mic = (struct mic_info *)arg;
 741        int err;
 742        struct pollfd console_poll[MAX_CONSOLE_FD];
 743        int pty_fd;
 744        char *pts_name;
 745        ssize_t len;
 746        struct mic_vring tx_vr, rx_vr;
 747        struct mic_copy_desc copy;
 748        struct mic_device_desc *desc;
 749
 750        pty_fd = posix_openpt(O_RDWR);
 751        if (pty_fd < 0) {
 752                mpsslog("can't open a pseudoterminal master device: %s\n",
 753                        strerror(errno));
 754                goto _return;
 755        }
 756        pts_name = ptsname(pty_fd);
 757        if (pts_name == NULL) {
 758                mpsslog("can't get pts name\n");
 759                goto _close_pty;
 760        }
 761        printf("%s console message goes to %s\n", mic->name, pts_name);
 762        mpsslog("%s console message goes to %s\n", mic->name, pts_name);
 763        err = grantpt(pty_fd);
 764        if (err < 0) {
 765                mpsslog("can't grant access: %s %s\n",
 766                        pts_name, strerror(errno));
 767                goto _close_pty;
 768        }
 769        err = unlockpt(pty_fd);
 770        if (err < 0) {
 771                mpsslog("can't unlock a pseudoterminal: %s %s\n",
 772                        pts_name, strerror(errno));
 773                goto _close_pty;
 774        }
 775        console_poll[MONITOR_FD].fd = pty_fd;
 776        console_poll[MONITOR_FD].events = POLLIN;
 777
 778        console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
 779        console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
 780
 781        if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
 782                                  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
 783                virtcons_dev_page.dd.num_vq)) {
 784                mpsslog("%s init_vr failed %s\n",
 785                        mic->name, strerror(errno));
 786                goto _close_pty;
 787        }
 788
 789        copy.iovcnt = 1;
 790        desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
 791
 792        for (;;) {
 793                console_poll[MONITOR_FD].revents = 0;
 794                console_poll[VIRTIO_CONSOLE_FD].revents = 0;
 795                err = poll(console_poll, MAX_CONSOLE_FD, -1);
 796                if (err < 0) {
 797                        mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
 798                                strerror(errno));
 799                        continue;
 800                }
 801                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 802                        err = wait_for_card_driver(mic,
 803                                        mic->mic_console.virtio_console_fd,
 804                                        VIRTIO_ID_CONSOLE);
 805                        if (err) {
 806                                mpsslog("%s %s %d Exiting...\n",
 807                                        mic->name, __func__, __LINE__);
 808                                break;
 809                        }
 810                }
 811
 812                if (console_poll[MONITOR_FD].revents & POLLIN) {
 813                        copy.iov = iov0;
 814                        len = readv(pty_fd, copy.iov, copy.iovcnt);
 815                        if (len > 0) {
 816#ifdef DEBUG
 817                                disp_iovec(mic, copy, __func__, __LINE__);
 818                                mpsslog("%s %s %d read from tap 0x%lx\n",
 819                                        mic->name, __func__, __LINE__,
 820                                        len);
 821#endif
 822                                spin_for_descriptors(mic, &tx_vr);
 823                                txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
 824                                             &copy, len);
 825
 826                                err = mic_virtio_copy(mic,
 827                                        mic->mic_console.virtio_console_fd,
 828                                        &tx_vr, &copy);
 829                                if (err < 0) {
 830                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 831                                                mic->name, __func__, __LINE__,
 832                                                strerror(errno));
 833                                }
 834                                if (!err)
 835                                        verify_out_len(mic, &copy);
 836#ifdef DEBUG
 837                                disp_iovec(mic, copy, __func__, __LINE__);
 838                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 839                                        mic->name, __func__, __LINE__,
 840                                        sum_iovec_len(copy));
 841#endif
 842                                /* Reinitialize IOV for next run */
 843                                iov0->iov_len = PAGE_SIZE;
 844                        } else if (len < 0) {
 845                                disp_iovec(mic, &copy, __func__, __LINE__);
 846                                mpsslog("%s %s %d read failed %s ",
 847                                        mic->name, __func__, __LINE__,
 848                                        strerror(errno));
 849                                mpsslog("cnt %d sum %zd\n",
 850                                        copy.iovcnt, sum_iovec_len(&copy));
 851                        }
 852                }
 853
 854                if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
 855                        while (rx_vr.info->avail_idx !=
 856                                le16toh(rx_vr.vr.avail->idx)) {
 857                                copy.iov = iov1;
 858                                txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
 859                                             &copy, PAGE_SIZE);
 860
 861                                err = mic_virtio_copy(mic,
 862                                        mic->mic_console.virtio_console_fd,
 863                                        &rx_vr, &copy);
 864                                if (!err) {
 865                                        /* Set the correct output iov_len */
 866                                        iov1->iov_len = copy.out_len;
 867                                        verify_out_len(mic, &copy);
 868#ifdef DEBUG
 869                                        disp_iovec(mic, copy, __func__,
 870                                                   __LINE__);
 871                                        mpsslog("%s %s %d ",
 872                                                mic->name, __func__, __LINE__);
 873                                        mpsslog("read from net 0x%lx\n",
 874                                                sum_iovec_len(copy));
 875#endif
 876                                        len = writev(pty_fd,
 877                                                copy.iov, copy.iovcnt);
 878                                        if (len != sum_iovec_len(&copy)) {
 879                                                mpsslog("Tun write failed %s ",
 880                                                        strerror(errno));
 881                                                mpsslog("len 0x%zx ", len);
 882                                                mpsslog("read_len 0x%zx\n",
 883                                                        sum_iovec_len(&copy));
 884                                        } else {
 885#ifdef DEBUG
 886                                                disp_iovec(mic, copy, __func__,
 887                                                           __LINE__);
 888                                                mpsslog("%s %s %d ",
 889                                                        mic->name, __func__,
 890                                                        __LINE__);
 891                                                mpsslog("wrote to tap 0x%lx\n",
 892                                                        len);
 893#endif
 894                                        }
 895                                } else {
 896                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 897                                                mic->name, __func__, __LINE__,
 898                                                strerror(errno));
 899                                        break;
 900                                }
 901                        }
 902                }
 903                if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 904                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 905        }
 906_close_pty:
 907        close(pty_fd);
 908_return:
 909        pthread_exit(NULL);
 910}
 911
 912static void
 913add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
 914{
 915        char path[PATH_MAX];
 916        int fd, err;
 917
 918        snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
 919        fd = open(path, O_RDWR);
 920        if (fd < 0) {
 921                mpsslog("Could not open %s %s\n", path, strerror(errno));
 922                return;
 923        }
 924
 925        err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
 926        if (err < 0) {
 927                mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
 928                close(fd);
 929                return;
 930        }
 931        switch (dd->type) {
 932        case VIRTIO_ID_NET:
 933                mic->mic_net.virtio_net_fd = fd;
 934                mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
 935                break;
 936        case VIRTIO_ID_CONSOLE:
 937                mic->mic_console.virtio_console_fd = fd;
 938                mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
 939                break;
 940        case VIRTIO_ID_BLOCK:
 941                mic->mic_virtblk.virtio_block_fd = fd;
 942                mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
 943                break;
 944        }
 945}
 946
 947static bool
 948set_backend_file(struct mic_info *mic)
 949{
 950        FILE *config;
 951        char buff[PATH_MAX], *line, *evv, *p;
 952
 953        snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
 954        config = fopen(buff, "r");
 955        if (config == NULL)
 956                return false;
 957        do {  /* look for "virtblk_backend=XXXX" */
 958                line = fgets(buff, PATH_MAX, config);
 959                if (line == NULL)
 960                        break;
 961                if (*line == '#')
 962                        continue;
 963                p = strchr(line, '\n');
 964                if (p)
 965                        *p = '\0';
 966        } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
 967        fclose(config);
 968        if (line == NULL)
 969                return false;
 970        evv = strchr(line, '=');
 971        if (evv == NULL)
 972                return false;
 973        mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
 974        if (mic->mic_virtblk.backend_file == NULL) {
 975                mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
 976                return false;
 977        }
 978        strcpy(mic->mic_virtblk.backend_file, evv + 1);
 979        return true;
 980}
 981
 982#define SECTOR_SIZE 512
 983static bool
 984set_backend_size(struct mic_info *mic)
 985{
 986        mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
 987                SEEK_END);
 988        if (mic->mic_virtblk.backend_size < 0) {
 989                mpsslog("%s: can't seek: %s\n",
 990                        mic->name, mic->mic_virtblk.backend_file);
 991                return false;
 992        }
 993        virtblk_dev_page.blk_config.capacity =
 994                mic->mic_virtblk.backend_size / SECTOR_SIZE;
 995        if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
 996                virtblk_dev_page.blk_config.capacity++;
 997
 998        virtblk_dev_page.blk_config.capacity =
 999                htole64(virtblk_dev_page.blk_config.capacity);
1000
1001        return true;
1002}
1003
1004static bool
1005open_backend(struct mic_info *mic)
1006{
1007        if (!set_backend_file(mic))
1008                goto _error_exit;
1009        mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1010        if (mic->mic_virtblk.backend < 0) {
1011                mpsslog("%s: can't open: %s\n", mic->name,
1012                        mic->mic_virtblk.backend_file);
1013                goto _error_free;
1014        }
1015        if (!set_backend_size(mic))
1016                goto _error_close;
1017        mic->mic_virtblk.backend_addr = mmap(NULL,
1018                mic->mic_virtblk.backend_size,
1019                PROT_READ|PROT_WRITE, MAP_SHARED,
1020                mic->mic_virtblk.backend, 0L);
1021        if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1022                mpsslog("%s: can't map: %s %s\n",
1023                        mic->name, mic->mic_virtblk.backend_file,
1024                        strerror(errno));
1025                goto _error_close;
1026        }
1027        return true;
1028
1029 _error_close:
1030        close(mic->mic_virtblk.backend);
1031 _error_free:
1032        free(mic->mic_virtblk.backend_file);
1033 _error_exit:
1034        return false;
1035}
1036
1037static void
1038close_backend(struct mic_info *mic)
1039{
1040        munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1041        close(mic->mic_virtblk.backend);
1042        free(mic->mic_virtblk.backend_file);
1043}
1044
1045static bool
1046start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1047{
1048        if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1049                mpsslog("%s: blk_config is not 8 byte aligned.\n",
1050                        mic->name);
1051                return false;
1052        }
1053        add_virtio_device(mic, &virtblk_dev_page.dd);
1054        if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1055                                  VIRTIO_ID_BLOCK, vring, NULL,
1056                                  virtblk_dev_page.dd.num_vq)) {
1057                mpsslog("%s init_vr failed %s\n",
1058                        mic->name, strerror(errno));
1059                return false;
1060        }
1061        return true;
1062}
1063
1064static void
1065stop_virtblk(struct mic_info *mic)
1066{
1067        int vr_size, ret;
1068
1069        vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1070                                         MIC_VIRTIO_RING_ALIGN) +
1071                             sizeof(struct _mic_vring_info));
1072        ret = munmap(mic->mic_virtblk.block_dp,
1073                MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1074        if (ret < 0)
1075                mpsslog("%s munmap errno %d\n", mic->name, errno);
1076        close(mic->mic_virtblk.virtio_block_fd);
1077}
1078
1079static __u8
1080header_error_check(struct vring_desc *desc)
1081{
1082        if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1083                mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1084                        __func__, __LINE__);
1085                return -EIO;
1086        }
1087        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1088                mpsslog("%s() %d: alone\n",
1089                        __func__, __LINE__);
1090                return -EIO;
1091        }
1092        if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1093                mpsslog("%s() %d: not read\n",
1094                        __func__, __LINE__);
1095                return -EIO;
1096        }
1097        return 0;
1098}
1099
1100static int
1101read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1102{
1103        struct iovec iovec;
1104        struct mic_copy_desc copy;
1105
1106        iovec.iov_len = sizeof(*hdr);
1107        iovec.iov_base = hdr;
1108        copy.iov = &iovec;
1109        copy.iovcnt = 1;
1110        copy.vr_idx = 0;  /* only one vring on virtio_block */
1111        copy.update_used = false;  /* do not update used index */
1112        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1113}
1114
1115static int
1116transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1117{
1118        struct mic_copy_desc copy;
1119
1120        copy.iov = iovec;
1121        copy.iovcnt = iovcnt;
1122        copy.vr_idx = 0;  /* only one vring on virtio_block */
1123        copy.update_used = false;  /* do not update used index */
1124        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1125}
1126
1127static __u8
1128status_error_check(struct vring_desc *desc)
1129{
1130        if (le32toh(desc->len) != sizeof(__u8)) {
1131                mpsslog("%s() %d: length is not sizeof(status)\n",
1132                        __func__, __LINE__);
1133                return -EIO;
1134        }
1135        return 0;
1136}
1137
1138static int
1139write_status(int fd, __u8 *status)
1140{
1141        struct iovec iovec;
1142        struct mic_copy_desc copy;
1143
1144        iovec.iov_base = status;
1145        iovec.iov_len = sizeof(*status);
1146        copy.iov = &iovec;
1147        copy.iovcnt = 1;
1148        copy.vr_idx = 0;  /* only one vring on virtio_block */
1149        copy.update_used = true; /* Update used index */
1150        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1151}
1152
1153#ifndef VIRTIO_BLK_T_GET_ID
1154#define VIRTIO_BLK_T_GET_ID    8
1155#endif
1156
1157static void *
1158virtio_block(void *arg)
1159{
1160        struct mic_info *mic = (struct mic_info *)arg;
1161        int ret;
1162        struct pollfd block_poll;
1163        struct mic_vring vring;
1164        __u16 avail_idx;
1165        __u32 desc_idx;
1166        struct vring_desc *desc;
1167        struct iovec *iovec, *piov;
1168        __u8 status;
1169        __u32 buffer_desc_idx;
1170        struct virtio_blk_outhdr hdr;
1171        void *fos;
1172
1173        for (;;) {  /* forever */
1174                if (!open_backend(mic)) { /* No virtblk */
1175                        for (mic->mic_virtblk.signaled = 0;
1176                                !mic->mic_virtblk.signaled;)
1177                                sleep(1);
1178                        continue;
1179                }
1180
1181                /* backend file is specified. */
1182                if (!start_virtblk(mic, &vring))
1183                        goto _close_backend;
1184                iovec = malloc(sizeof(*iovec) *
1185                        le32toh(virtblk_dev_page.blk_config.seg_max));
1186                if (!iovec) {
1187                        mpsslog("%s: can't alloc iovec: %s\n",
1188                                mic->name, strerror(ENOMEM));
1189                        goto _stop_virtblk;
1190                }
1191
1192                block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1193                block_poll.events = POLLIN;
1194                for (mic->mic_virtblk.signaled = 0;
1195                     !mic->mic_virtblk.signaled;) {
1196                        block_poll.revents = 0;
1197                                        /* timeout in 1 sec to see signaled */
1198                        ret = poll(&block_poll, 1, 1000);
1199                        if (ret < 0) {
1200                                mpsslog("%s %d: poll failed: %s\n",
1201                                        __func__, __LINE__,
1202                                        strerror(errno));
1203                                continue;
1204                        }
1205
1206                        if (!(block_poll.revents & POLLIN)) {
1207#ifdef DEBUG
1208                                mpsslog("%s %d: block_poll.revents=0x%x\n",
1209                                        __func__, __LINE__, block_poll.revents);
1210#endif
1211                                continue;
1212                        }
1213
1214                        /* POLLIN */
1215                        while (vring.info->avail_idx !=
1216                                le16toh(vring.vr.avail->idx)) {
1217                                /* read header element */
1218                                avail_idx =
1219                                        vring.info->avail_idx &
1220                                        (vring.vr.num - 1);
1221                                desc_idx = le16toh(
1222                                        vring.vr.avail->ring[avail_idx]);
1223                                desc = &vring.vr.desc[desc_idx];
1224#ifdef DEBUG
1225                                mpsslog("%s() %d: avail_idx=%d ",
1226                                        __func__, __LINE__,
1227                                        vring.info->avail_idx);
1228                                mpsslog("vring.vr.num=%d desc=%p\n",
1229                                        vring.vr.num, desc);
1230#endif
1231                                status = header_error_check(desc);
1232                                ret = read_header(
1233                                        mic->mic_virtblk.virtio_block_fd,
1234                                        &hdr, desc_idx);
1235                                if (ret < 0) {
1236                                        mpsslog("%s() %d %s: ret=%d %s\n",
1237                                                __func__, __LINE__,
1238                                                mic->name, ret,
1239                                                strerror(errno));
1240                                        break;
1241                                }
1242                                /* buffer element */
1243                                piov = iovec;
1244                                status = 0;
1245                                fos = mic->mic_virtblk.backend_addr +
1246                                        (hdr.sector * SECTOR_SIZE);
1247                                buffer_desc_idx = next_desc(desc);
1248                                desc_idx = buffer_desc_idx;
1249                                for (desc = &vring.vr.desc[buffer_desc_idx];
1250                                     desc->flags & VRING_DESC_F_NEXT;
1251                                     desc_idx = next_desc(desc),
1252                                             desc = &vring.vr.desc[desc_idx]) {
1253                                        piov->iov_len = desc->len;
1254                                        piov->iov_base = fos;
1255                                        piov++;
1256                                        fos += desc->len;
1257                                }
1258                                /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1259                                if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1260                                        VIRTIO_BLK_T_GET_ID)) {
1261                                        /*
1262                                          VIRTIO_BLK_T_IN - does not do
1263                                          anything. Probably for documenting.
1264                                          VIRTIO_BLK_T_SCSI_CMD - for
1265                                          virtio_scsi.
1266                                          VIRTIO_BLK_T_FLUSH - turned off in
1267                                          config space.
1268                                          VIRTIO_BLK_T_BARRIER - defined but not
1269                                          used in anywhere.
1270                                        */
1271                                        mpsslog("%s() %d: type %x ",
1272                                                __func__, __LINE__,
1273                                                hdr.type);
1274                                        mpsslog("is not supported\n");
1275                                        status = -ENOTSUP;
1276
1277                                } else {
1278                                        ret = transfer_blocks(
1279                                        mic->mic_virtblk.virtio_block_fd,
1280                                                iovec,
1281                                                piov - iovec);
1282                                        if (ret < 0 &&
1283                                            status != 0)
1284                                                status = ret;
1285                                }
1286                                /* write status and update used pointer */
1287                                if (status != 0)
1288                                        status = status_error_check(desc);
1289                                ret = write_status(
1290                                        mic->mic_virtblk.virtio_block_fd,
1291                                        &status);
1292#ifdef DEBUG
1293                                mpsslog("%s() %d: write status=%d on desc=%p\n",
1294                                        __func__, __LINE__,
1295                                        status, desc);
1296#endif
1297                        }
1298                }
1299                free(iovec);
1300_stop_virtblk:
1301                stop_virtblk(mic);
1302_close_backend:
1303                close_backend(mic);
1304        }  /* forever */
1305
1306        pthread_exit(NULL);
1307}
1308
1309static void
1310reset(struct mic_info *mic)
1311{
1312#define RESET_TIMEOUT 120
1313        int i = RESET_TIMEOUT;
1314        setsysfs(mic->name, "state", "reset");
1315        while (i) {
1316                char *state;
1317                state = readsysfs(mic->name, "state");
1318                if (!state)
1319                        goto retry;
1320                mpsslog("%s: %s %d state %s\n",
1321                        mic->name, __func__, __LINE__, state);
1322
1323                if (!strcmp(state, "ready")) {
1324                        free(state);
1325                        break;
1326                }
1327                free(state);
1328retry:
1329                sleep(1);
1330                i--;
1331        }
1332}
1333
1334static int
1335get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1336{
1337        if (!strcmp(shutdown_status, "nop"))
1338                return MIC_NOP;
1339        if (!strcmp(shutdown_status, "crashed"))
1340                return MIC_CRASHED;
1341        if (!strcmp(shutdown_status, "halted"))
1342                return MIC_HALTED;
1343        if (!strcmp(shutdown_status, "poweroff"))
1344                return MIC_POWER_OFF;
1345        if (!strcmp(shutdown_status, "restart"))
1346                return MIC_RESTART;
1347        mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1348        /* Invalid state */
1349        assert(0);
1350};
1351
1352static int get_mic_state(struct mic_info *mic)
1353{
1354        char *state = NULL;
1355        enum mic_states mic_state;
1356
1357        while (!state) {
1358                state = readsysfs(mic->name, "state");
1359                sleep(1);
1360        }
1361        mpsslog("%s: %s %d state %s\n",
1362                mic->name, __func__, __LINE__, state);
1363
1364        if (!strcmp(state, "ready")) {
1365                mic_state = MIC_READY;
1366        } else if (!strcmp(state, "booting")) {
1367                mic_state = MIC_BOOTING;
1368        } else if (!strcmp(state, "online")) {
1369                mic_state = MIC_ONLINE;
1370        } else if (!strcmp(state, "shutting_down")) {
1371                mic_state = MIC_SHUTTING_DOWN;
1372        } else if (!strcmp(state, "reset_failed")) {
1373                mic_state = MIC_RESET_FAILED;
1374        } else if (!strcmp(state, "resetting")) {
1375                mic_state = MIC_RESETTING;
1376        } else {
1377                mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1378                assert(0);
1379        }
1380
1381        free(state);
1382        return mic_state;
1383};
1384
1385static void mic_handle_shutdown(struct mic_info *mic)
1386{
1387#define SHUTDOWN_TIMEOUT 60
1388        int i = SHUTDOWN_TIMEOUT;
1389        char *shutdown_status;
1390        while (i) {
1391                shutdown_status = readsysfs(mic->name, "shutdown_status");
1392                if (!shutdown_status) {
1393                        sleep(1);
1394                        continue;
1395                }
1396                mpsslog("%s: %s %d shutdown_status %s\n",
1397                        mic->name, __func__, __LINE__, shutdown_status);
1398                switch (get_mic_shutdown_status(mic, shutdown_status)) {
1399                case MIC_RESTART:
1400                        mic->restart = 1;
1401                case MIC_HALTED:
1402                case MIC_POWER_OFF:
1403                case MIC_CRASHED:
1404                        free(shutdown_status);
1405                        goto reset;
1406                default:
1407                        break;
1408                }
1409                free(shutdown_status);
1410                sleep(1);
1411                i--;
1412        }
1413reset:
1414        if (!i)
1415                mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1416                        mic->name, __func__, __LINE__, shutdown_status);
1417        reset(mic);
1418}
1419
1420static int open_state_fd(struct mic_info *mic)
1421{
1422        char pathname[PATH_MAX];
1423        int fd;
1424
1425        snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1426                 MICSYSFSDIR, mic->name, "state");
1427
1428        fd = open(pathname, O_RDONLY);
1429        if (fd < 0)
1430                mpsslog("%s: opening file %s failed %s\n",
1431                        mic->name, pathname, strerror(errno));
1432        return fd;
1433}
1434
1435static int block_till_state_change(int fd, struct mic_info *mic)
1436{
1437        struct pollfd ufds[1];
1438        char value[PAGE_SIZE];
1439        int ret;
1440
1441        ufds[0].fd = fd;
1442        ufds[0].events = POLLERR | POLLPRI;
1443        ret = poll(ufds, 1, -1);
1444        if (ret < 0) {
1445                mpsslog("%s: %s %d poll failed %s\n",
1446                        mic->name, __func__, __LINE__, strerror(errno));
1447                return ret;
1448        }
1449
1450        ret = lseek(fd, 0, SEEK_SET);
1451        if (ret < 0) {
1452                mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1453                        mic->name, __func__, __LINE__, strerror(errno));
1454                return ret;
1455        }
1456
1457        ret = read(fd, value, sizeof(value));
1458        if (ret < 0) {
1459                mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1460                        mic->name, __func__, __LINE__, strerror(errno));
1461                return ret;
1462        }
1463
1464        return 0;
1465}
1466
1467static void *
1468mic_config(void *arg)
1469{
1470        struct mic_info *mic = (struct mic_info *)arg;
1471        int fd, ret, stat = 0;
1472
1473        fd = open_state_fd(mic);
1474        if (fd < 0) {
1475                mpsslog("%s: %s %d open state fd failed %s\n",
1476                        mic->name, __func__, __LINE__, strerror(errno));
1477                goto exit;
1478        }
1479
1480        do {
1481                ret = block_till_state_change(fd, mic);
1482                if (ret < 0) {
1483                        mpsslog("%s: %s %d block_till_state_change error %s\n",
1484                                mic->name, __func__, __LINE__, strerror(errno));
1485                        goto close_exit;
1486                }
1487
1488                switch (get_mic_state(mic)) {
1489                case MIC_SHUTTING_DOWN:
1490                        mic_handle_shutdown(mic);
1491                        break;
1492                case MIC_READY:
1493                case MIC_RESET_FAILED:
1494                        ret = kill(mic->pid, SIGTERM);
1495                        mpsslog("%s: %s %d kill pid %d ret %d\n",
1496                                mic->name, __func__, __LINE__,
1497                                mic->pid, ret);
1498                        if (!ret) {
1499                                ret = waitpid(mic->pid, &stat,
1500                                              WIFSIGNALED(stat));
1501                                mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1502                                        mic->name, __func__, __LINE__,
1503                                        ret, mic->pid);
1504                        }
1505                        if (mic->boot_on_resume) {
1506                                setsysfs(mic->name, "state", "boot");
1507                                mic->boot_on_resume = 0;
1508                        }
1509                        goto close_exit;
1510                default:
1511                        break;
1512                }
1513        } while (1);
1514
1515close_exit:
1516        close(fd);
1517exit:
1518        init_mic(mic);
1519        pthread_exit(NULL);
1520}
1521
1522static void
1523set_cmdline(struct mic_info *mic)
1524{
1525        char buffer[PATH_MAX];
1526        int len;
1527
1528        len = snprintf(buffer, PATH_MAX,
1529                "clocksource=tsc highres=off nohz=off ");
1530        len += snprintf(buffer + len, PATH_MAX - len,
1531                "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1532        len += snprintf(buffer + len, PATH_MAX - len,
1533                "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1534                mic->id + 1);
1535
1536        setsysfs(mic->name, "cmdline", buffer);
1537        mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1538        snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1539        mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1540}
1541
1542static void
1543set_log_buf_info(struct mic_info *mic)
1544{
1545        int fd;
1546        off_t len;
1547        char system_map[] = "/lib/firmware/mic/System.map";
1548        char *map, *temp, log_buf[17] = {'\0'};
1549
1550        fd = open(system_map, O_RDONLY);
1551        if (fd < 0) {
1552                mpsslog("%s: Opening System.map failed: %d\n",
1553                        mic->name, errno);
1554                return;
1555        }
1556        len = lseek(fd, 0, SEEK_END);
1557        if (len < 0) {
1558                mpsslog("%s: Reading System.map size failed: %d\n",
1559                        mic->name, errno);
1560                close(fd);
1561                return;
1562        }
1563        map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1564        if (map == MAP_FAILED) {
1565                mpsslog("%s: mmap of System.map failed: %d\n",
1566                        mic->name, errno);
1567                close(fd);
1568                return;
1569        }
1570        temp = strstr(map, "__log_buf");
1571        if (!temp) {
1572                mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1573                munmap(map, len);
1574                close(fd);
1575                return;
1576        }
1577        strncpy(log_buf, temp - 19, 16);
1578        setsysfs(mic->name, "log_buf_addr", log_buf);
1579        mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1580        temp = strstr(map, "log_buf_len");
1581        if (!temp) {
1582                mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1583                munmap(map, len);
1584                close(fd);
1585                return;
1586        }
1587        strncpy(log_buf, temp - 19, 16);
1588        setsysfs(mic->name, "log_buf_len", log_buf);
1589        mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1590        munmap(map, len);
1591        close(fd);
1592}
1593
1594static void
1595change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1596{
1597        struct mic_info *mic;
1598
1599        for (mic = mic_list.next; mic != NULL; mic = mic->next)
1600                mic->mic_virtblk.signaled = 1/* true */;
1601}
1602
1603static void
1604set_mic_boot_params(struct mic_info *mic)
1605{
1606        set_log_buf_info(mic);
1607        set_cmdline(mic);
1608}
1609
1610static void *
1611init_mic(void *arg)
1612{
1613        struct mic_info *mic = (struct mic_info *)arg;
1614        struct sigaction ignore = {
1615                .sa_flags = 0,
1616                .sa_handler = SIG_IGN
1617        };
1618        struct sigaction act = {
1619                .sa_flags = SA_SIGINFO,
1620                .sa_sigaction = change_virtblk_backend,
1621        };
1622        char buffer[PATH_MAX];
1623        int err, fd;
1624
1625        /*
1626         * Currently, one virtio block device is supported for each MIC card
1627         * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1628         * The signal informs the virtio block backend about a change in the
1629         * configuration file which specifies the virtio backend file name on
1630         * the host. Virtio block backend then re-reads the configuration file
1631         * and switches to the new block device. This signalling mechanism may
1632         * not be required once multiple virtio block devices are supported by
1633         * the MIC daemon.
1634         */
1635        sigaction(SIGUSR1, &ignore, NULL);
1636retry:
1637        fd = open_state_fd(mic);
1638        if (fd < 0) {
1639                mpsslog("%s: %s %d open state fd failed %s\n",
1640                        mic->name, __func__, __LINE__, strerror(errno));
1641                sleep(2);
1642                goto retry;
1643        }
1644
1645        if (mic->restart) {
1646                snprintf(buffer, PATH_MAX, "boot");
1647                setsysfs(mic->name, "state", buffer);
1648                mpsslog("%s restarting mic %d\n",
1649                        mic->name, mic->restart);
1650                mic->restart = 0;
1651        }
1652
1653        while (1) {
1654                while (block_till_state_change(fd, mic)) {
1655                        mpsslog("%s: %s %d block_till_state_change error %s\n",
1656                                mic->name, __func__, __LINE__, strerror(errno));
1657                        sleep(2);
1658                        continue;
1659                }
1660
1661                if (get_mic_state(mic) == MIC_BOOTING)
1662                        break;
1663        }
1664
1665        mic->pid = fork();
1666        switch (mic->pid) {
1667        case 0:
1668                add_virtio_device(mic, &virtcons_dev_page.dd);
1669                add_virtio_device(mic, &virtnet_dev_page.dd);
1670                err = pthread_create(&mic->mic_console.console_thread, NULL,
1671                        virtio_console, mic);
1672                if (err)
1673                        mpsslog("%s virtcons pthread_create failed %s\n",
1674                                mic->name, strerror(err));
1675                err = pthread_create(&mic->mic_net.net_thread, NULL,
1676                        virtio_net, mic);
1677                if (err)
1678                        mpsslog("%s virtnet pthread_create failed %s\n",
1679                                mic->name, strerror(err));
1680                err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1681                        virtio_block, mic);
1682                if (err)
1683                        mpsslog("%s virtblk pthread_create failed %s\n",
1684                                mic->name, strerror(err));
1685                sigemptyset(&act.sa_mask);
1686                err = sigaction(SIGUSR1, &act, NULL);
1687                if (err)
1688                        mpsslog("%s sigaction SIGUSR1 failed %s\n",
1689                                mic->name, strerror(errno));
1690                while (1)
1691                        sleep(60);
1692        case -1:
1693                mpsslog("fork failed MIC name %s id %d errno %d\n",
1694                        mic->name, mic->id, errno);
1695                break;
1696        default:
1697                err = pthread_create(&mic->config_thread, NULL,
1698                                     mic_config, mic);
1699                if (err)
1700                        mpsslog("%s mic_config pthread_create failed %s\n",
1701                                mic->name, strerror(err));
1702        }
1703
1704        return NULL;
1705}
1706
1707static void
1708start_daemon(void)
1709{
1710        struct mic_info *mic;
1711        int err;
1712
1713        for (mic = mic_list.next; mic; mic = mic->next) {
1714                set_mic_boot_params(mic);
1715                err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1716                if (err)
1717                        mpsslog("%s init_mic pthread_create failed %s\n",
1718                                mic->name, strerror(err));
1719        }
1720
1721        while (1)
1722                sleep(60);
1723}
1724
1725static int
1726init_mic_list(void)
1727{
1728        struct mic_info *mic = &mic_list;
1729        struct dirent *file;
1730        DIR *dp;
1731        int cnt = 0;
1732
1733        dp = opendir(MICSYSFSDIR);
1734        if (!dp)
1735                return 0;
1736
1737        while ((file = readdir(dp)) != NULL) {
1738                if (!strncmp(file->d_name, "mic", 3)) {
1739                        mic->next = calloc(1, sizeof(struct mic_info));
1740                        if (mic->next) {
1741                                mic = mic->next;
1742                                mic->id = atoi(&file->d_name[3]);
1743                                mic->name = malloc(strlen(file->d_name) + 16);
1744                                if (mic->name)
1745                                        strcpy(mic->name, file->d_name);
1746                                mpsslog("MIC name %s id %d\n", mic->name,
1747                                        mic->id);
1748                                cnt++;
1749                        }
1750                }
1751        }
1752
1753        closedir(dp);
1754        return cnt;
1755}
1756
1757void
1758mpsslog(char *format, ...)
1759{
1760        va_list args;
1761        char buffer[4096];
1762        char ts[52], *ts1;
1763        time_t t;
1764
1765        if (logfp == NULL)
1766                return;
1767
1768        va_start(args, format);
1769        vsprintf(buffer, format, args);
1770        va_end(args);
1771
1772        time(&t);
1773        ts1 = ctime_r(&t, ts);
1774        ts1[strlen(ts1) - 1] = '\0';
1775        fprintf(logfp, "%s: %s", ts1, buffer);
1776
1777        fflush(logfp);
1778}
1779
1780int
1781main(int argc, char *argv[])
1782{
1783        int cnt;
1784        pid_t pid;
1785
1786        myname = argv[0];
1787
1788        logfp = fopen(LOGFILE_NAME, "a+");
1789        if (!logfp) {
1790                fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1791                exit(1);
1792        }
1793        pid = fork();
1794        switch (pid) {
1795        case 0:
1796                break;
1797        case -1:
1798                exit(2);
1799        default:
1800                exit(0);
1801        }
1802
1803        mpsslog("MIC Daemon start\n");
1804
1805        cnt = init_mic_list();
1806        if (cnt == 0) {
1807                mpsslog("MIC module not loaded\n");
1808                exit(3);
1809        }
1810        mpsslog("MIC found %d devices\n", cnt);
1811
1812        start_daemon();
1813
1814        exit(0);
1815}
1816