linux/Documentation/mic/mpssd/mpssd.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2013 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * The full GNU General Public License is included in this distribution in
  16 * the file called "COPYING".
  17 *
  18 * Intel MIC User Space Tools.
  19 */
  20
  21#define _GNU_SOURCE
  22
  23#include <stdlib.h>
  24#include <fcntl.h>
  25#include <getopt.h>
  26#include <assert.h>
  27#include <unistd.h>
  28#include <stdbool.h>
  29#include <signal.h>
  30#include <poll.h>
  31#include <features.h>
  32#include <sys/types.h>
  33#include <sys/stat.h>
  34#include <sys/mman.h>
  35#include <sys/socket.h>
  36#include <linux/virtio_ring.h>
  37#include <linux/virtio_net.h>
  38#include <linux/virtio_console.h>
  39#include <linux/virtio_blk.h>
  40#include <linux/version.h>
  41#include "mpssd.h"
  42#include <linux/mic_ioctl.h>
  43#include <linux/mic_common.h>
  44#include <tools/endian.h>
  45
  46static void init_mic(struct mic_info *mic);
  47
  48static FILE *logfp;
  49static struct mic_info mic_list;
  50
  51#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  52
  53#define min_t(type, x, y) ({                            \
  54                type __min1 = (x);                      \
  55                type __min2 = (y);                      \
  56                __min1 < __min2 ? __min1 : __min2; })
  57
  58/* align addr on a size boundary - adjust address up/down if needed */
  59#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
  60#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
  61
  62/* align addr on a size boundary - adjust address up if needed */
  63#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
  64
  65/* to align the pointer to the (next) page boundary */
  66#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
  67
  68#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  69
  70#define GSO_ENABLED             1
  71#define MAX_GSO_SIZE            (64 * 1024)
  72#define ETH_H_LEN               14
  73#define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  74#define MIC_DEVICE_PAGE_END     0x1000
  75
  76#ifndef VIRTIO_NET_HDR_F_DATA_VALID
  77#define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
  78#endif
  79
  80static struct {
  81        struct mic_device_desc dd;
  82        struct mic_vqconfig vqconfig[2];
  83        __u32 host_features, guest_acknowledgements;
  84        struct virtio_console_config cons_config;
  85} virtcons_dev_page = {
  86        .dd = {
  87                .type = VIRTIO_ID_CONSOLE,
  88                .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  89                .feature_len = sizeof(virtcons_dev_page.host_features),
  90                .config_len = sizeof(virtcons_dev_page.cons_config),
  91        },
  92        .vqconfig[0] = {
  93                .num = htole16(MIC_VRING_ENTRIES),
  94        },
  95        .vqconfig[1] = {
  96                .num = htole16(MIC_VRING_ENTRIES),
  97        },
  98};
  99
 100static struct {
 101        struct mic_device_desc dd;
 102        struct mic_vqconfig vqconfig[2];
 103        __u32 host_features, guest_acknowledgements;
 104        struct virtio_net_config net_config;
 105} virtnet_dev_page = {
 106        .dd = {
 107                .type = VIRTIO_ID_NET,
 108                .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
 109                .feature_len = sizeof(virtnet_dev_page.host_features),
 110                .config_len = sizeof(virtnet_dev_page.net_config),
 111        },
 112        .vqconfig[0] = {
 113                .num = htole16(MIC_VRING_ENTRIES),
 114        },
 115        .vqconfig[1] = {
 116                .num = htole16(MIC_VRING_ENTRIES),
 117        },
 118#if GSO_ENABLED
 119                .host_features = htole32(
 120                1 << VIRTIO_NET_F_CSUM |
 121                1 << VIRTIO_NET_F_GSO |
 122                1 << VIRTIO_NET_F_GUEST_TSO4 |
 123                1 << VIRTIO_NET_F_GUEST_TSO6 |
 124                1 << VIRTIO_NET_F_GUEST_ECN |
 125                1 << VIRTIO_NET_F_GUEST_UFO),
 126#else
 127                .host_features = 0,
 128#endif
 129};
 130
 131static const char *mic_config_dir = "/etc/sysconfig/mic";
 132static const char *virtblk_backend = "VIRTBLK_BACKEND";
 133static struct {
 134        struct mic_device_desc dd;
 135        struct mic_vqconfig vqconfig[1];
 136        __u32 host_features, guest_acknowledgements;
 137        struct virtio_blk_config blk_config;
 138} virtblk_dev_page = {
 139        .dd = {
 140                .type = VIRTIO_ID_BLOCK,
 141                .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
 142                .feature_len = sizeof(virtblk_dev_page.host_features),
 143                .config_len = sizeof(virtblk_dev_page.blk_config),
 144        },
 145        .vqconfig[0] = {
 146                .num = htole16(MIC_VRING_ENTRIES),
 147        },
 148        .host_features =
 149                htole32(1<<VIRTIO_BLK_F_SEG_MAX),
 150        .blk_config = {
 151                .seg_max = htole32(MIC_VRING_ENTRIES - 2),
 152                .capacity = htole64(0),
 153         }
 154};
 155
 156static char *myname;
 157
 158static int
 159tap_configure(struct mic_info *mic, char *dev)
 160{
 161        pid_t pid;
 162        char *ifargv[7];
 163        char ipaddr[IFNAMSIZ];
 164        int ret = 0;
 165
 166        pid = fork();
 167        if (pid == 0) {
 168                ifargv[0] = "ip";
 169                ifargv[1] = "link";
 170                ifargv[2] = "set";
 171                ifargv[3] = dev;
 172                ifargv[4] = "up";
 173                ifargv[5] = NULL;
 174                mpsslog("Configuring %s\n", dev);
 175                ret = execvp("ip", ifargv);
 176                if (ret < 0) {
 177                        mpsslog("%s execvp failed errno %s\n",
 178                                mic->name, strerror(errno));
 179                        return ret;
 180                }
 181        }
 182        if (pid < 0) {
 183                mpsslog("%s fork failed errno %s\n",
 184                        mic->name, strerror(errno));
 185                return ret;
 186        }
 187
 188        ret = waitpid(pid, NULL, 0);
 189        if (ret < 0) {
 190                mpsslog("%s waitpid failed errno %s\n",
 191                        mic->name, strerror(errno));
 192                return ret;
 193        }
 194
 195        snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
 196
 197        pid = fork();
 198        if (pid == 0) {
 199                ifargv[0] = "ip";
 200                ifargv[1] = "addr";
 201                ifargv[2] = "add";
 202                ifargv[3] = ipaddr;
 203                ifargv[4] = "dev";
 204                ifargv[5] = dev;
 205                ifargv[6] = NULL;
 206                mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
 207                ret = execvp("ip", ifargv);
 208                if (ret < 0) {
 209                        mpsslog("%s execvp failed errno %s\n",
 210                                mic->name, strerror(errno));
 211                        return ret;
 212                }
 213        }
 214        if (pid < 0) {
 215                mpsslog("%s fork failed errno %s\n",
 216                        mic->name, strerror(errno));
 217                return ret;
 218        }
 219
 220        ret = waitpid(pid, NULL, 0);
 221        if (ret < 0) {
 222                mpsslog("%s waitpid failed errno %s\n",
 223                        mic->name, strerror(errno));
 224                return ret;
 225        }
 226        mpsslog("MIC name %s %s %d DONE!\n",
 227                mic->name, __func__, __LINE__);
 228        return 0;
 229}
 230
 231static int tun_alloc(struct mic_info *mic, char *dev)
 232{
 233        struct ifreq ifr;
 234        int fd, err;
 235#if GSO_ENABLED
 236        unsigned offload;
 237#endif
 238        fd = open("/dev/net/tun", O_RDWR);
 239        if (fd < 0) {
 240                mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
 241                goto done;
 242        }
 243
 244        memset(&ifr, 0, sizeof(ifr));
 245
 246        ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 247        if (*dev)
 248                strncpy(ifr.ifr_name, dev, IFNAMSIZ);
 249
 250        err = ioctl(fd, TUNSETIFF, (void *)&ifr);
 251        if (err < 0) {
 252                mpsslog("%s %s %d TUNSETIFF failed %s\n",
 253                        mic->name, __func__, __LINE__, strerror(errno));
 254                close(fd);
 255                return err;
 256        }
 257#if GSO_ENABLED
 258        offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
 259                TUN_F_TSO_ECN | TUN_F_UFO;
 260
 261        err = ioctl(fd, TUNSETOFFLOAD, offload);
 262        if (err < 0) {
 263                mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
 264                        mic->name, __func__, __LINE__, strerror(errno));
 265                close(fd);
 266                return err;
 267        }
 268#endif
 269        strcpy(dev, ifr.ifr_name);
 270        mpsslog("Created TAP %s\n", dev);
 271done:
 272        return fd;
 273}
 274
 275#define NET_FD_VIRTIO_NET 0
 276#define NET_FD_TUN 1
 277#define MAX_NET_FD 2
 278
 279static void set_dp(struct mic_info *mic, int type, void *dp)
 280{
 281        switch (type) {
 282        case VIRTIO_ID_CONSOLE:
 283                mic->mic_console.console_dp = dp;
 284                return;
 285        case VIRTIO_ID_NET:
 286                mic->mic_net.net_dp = dp;
 287                return;
 288        case VIRTIO_ID_BLOCK:
 289                mic->mic_virtblk.block_dp = dp;
 290                return;
 291        }
 292        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 293        assert(0);
 294}
 295
 296static void *get_dp(struct mic_info *mic, int type)
 297{
 298        switch (type) {
 299        case VIRTIO_ID_CONSOLE:
 300                return mic->mic_console.console_dp;
 301        case VIRTIO_ID_NET:
 302                return mic->mic_net.net_dp;
 303        case VIRTIO_ID_BLOCK:
 304                return mic->mic_virtblk.block_dp;
 305        }
 306        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 307        assert(0);
 308        return NULL;
 309}
 310
 311static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
 312{
 313        struct mic_device_desc *d;
 314        int i;
 315        void *dp = get_dp(mic, type);
 316
 317        for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
 318                i += mic_total_desc_size(d)) {
 319                d = dp + i;
 320
 321                /* End of list */
 322                if (d->type == 0)
 323                        break;
 324
 325                if (d->type == -1)
 326                        continue;
 327
 328                mpsslog("%s %s d-> type %d d %p\n",
 329                        mic->name, __func__, d->type, d);
 330
 331                if (d->type == (__u8)type)
 332                        return d;
 333        }
 334        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 335        assert(0);
 336        return NULL;
 337}
 338
 339/* See comments in vhost.c for explanation of next_desc() */
 340static unsigned next_desc(struct vring_desc *desc)
 341{
 342        unsigned int next;
 343
 344        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
 345                return -1U;
 346        next = le16toh(desc->next);
 347        return next;
 348}
 349
 350/* Sum up all the IOVEC length */
 351static ssize_t
 352sum_iovec_len(struct mic_copy_desc *copy)
 353{
 354        ssize_t sum = 0;
 355        int i;
 356
 357        for (i = 0; i < copy->iovcnt; i++)
 358                sum += copy->iov[i].iov_len;
 359        return sum;
 360}
 361
 362static inline void verify_out_len(struct mic_info *mic,
 363        struct mic_copy_desc *copy)
 364{
 365        if (copy->out_len != sum_iovec_len(copy)) {
 366                mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
 367                        mic->name, __func__, __LINE__,
 368                        copy->out_len, sum_iovec_len(copy));
 369                assert(copy->out_len == sum_iovec_len(copy));
 370        }
 371}
 372
 373/* Display an iovec */
 374static void
 375disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
 376           const char *s, int line)
 377{
 378        int i;
 379
 380        for (i = 0; i < copy->iovcnt; i++)
 381                mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
 382                        mic->name, s, line, i,
 383                        copy->iov[i].iov_base, copy->iov[i].iov_len);
 384}
 385
 386static inline __u16 read_avail_idx(struct mic_vring *vr)
 387{
 388        return ACCESS_ONCE(vr->info->avail_idx);
 389}
 390
 391static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
 392                                struct mic_copy_desc *copy, ssize_t len)
 393{
 394        copy->vr_idx = tx ? 0 : 1;
 395        copy->update_used = true;
 396        if (type == VIRTIO_ID_NET)
 397                copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
 398        else
 399                copy->iov[0].iov_len = len;
 400}
 401
 402/* Central API which triggers the copies */
 403static int
 404mic_virtio_copy(struct mic_info *mic, int fd,
 405                struct mic_vring *vr, struct mic_copy_desc *copy)
 406{
 407        int ret;
 408
 409        ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
 410        if (ret) {
 411                mpsslog("%s %s %d errno %s ret %d\n",
 412                        mic->name, __func__, __LINE__,
 413                        strerror(errno), ret);
 414        }
 415        return ret;
 416}
 417
 418/*
 419 * This initialization routine requires at least one
 420 * vring i.e. vr0. vr1 is optional.
 421 */
 422static void *
 423init_vr(struct mic_info *mic, int fd, int type,
 424        struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
 425{
 426        int vr_size;
 427        char *va;
 428
 429        vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
 430                MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
 431        va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
 432                PROT_READ, MAP_SHARED, fd, 0);
 433        if (MAP_FAILED == va) {
 434                mpsslog("%s %s %d mmap failed errno %s\n",
 435                        mic->name, __func__, __LINE__,
 436                        strerror(errno));
 437                goto done;
 438        }
 439        set_dp(mic, type, va);
 440        vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
 441        vr0->info = vr0->va +
 442                vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
 443        vring_init(&vr0->vr,
 444                   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
 445        mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
 446                __func__, mic->name, vr0->va, vr0->info, vr_size,
 447                vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 448        mpsslog("magic 0x%x expected 0x%x\n",
 449                le32toh(vr0->info->magic), MIC_MAGIC + type);
 450        assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
 451        if (vr1) {
 452                vr1->va = (struct mic_vring *)
 453                        &va[MIC_DEVICE_PAGE_END + vr_size];
 454                vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
 455                        MIC_VIRTIO_RING_ALIGN);
 456                vring_init(&vr1->vr,
 457                           MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
 458                mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
 459                        __func__, mic->name, vr1->va, vr1->info, vr_size,
 460                        vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 461                mpsslog("magic 0x%x expected 0x%x\n",
 462                        le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
 463                assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
 464        }
 465done:
 466        return va;
 467}
 468
 469static void
 470wait_for_card_driver(struct mic_info *mic, int fd, int type)
 471{
 472        struct pollfd pollfd;
 473        int err;
 474        struct mic_device_desc *desc = get_device_desc(mic, type);
 475
 476        pollfd.fd = fd;
 477        mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
 478                mic->name, __func__, type, desc->status);
 479        while (1) {
 480                pollfd.events = POLLIN;
 481                pollfd.revents = 0;
 482                err = poll(&pollfd, 1, -1);
 483                if (err < 0) {
 484                        mpsslog("%s %s poll failed %s\n",
 485                                mic->name, __func__, strerror(errno));
 486                        continue;
 487                }
 488
 489                if (pollfd.revents) {
 490                        mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
 491                                mic->name, __func__, type, desc->status);
 492                        if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
 493                                mpsslog("%s %s poll.revents %d\n",
 494                                        mic->name, __func__, pollfd.revents);
 495                                mpsslog("%s %s desc-> type %d status 0x%x\n",
 496                                        mic->name, __func__, type,
 497                                        desc->status);
 498                                break;
 499                        }
 500                }
 501        }
 502}
 503
 504/* Spin till we have some descriptors */
 505static void
 506spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
 507{
 508        __u16 avail_idx = read_avail_idx(vr);
 509
 510        while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
 511#ifdef DEBUG
 512                mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
 513                        mic->name, __func__,
 514                        le16toh(vr->vr.avail->idx), vr->info->avail_idx);
 515#endif
 516                sched_yield();
 517        }
 518}
 519
 520static void *
 521virtio_net(void *arg)
 522{
 523        static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
 524        static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
 525        struct iovec vnet_iov[2][2] = {
 526                { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
 527                  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
 528                { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
 529                  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
 530        };
 531        struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
 532        struct mic_info *mic = (struct mic_info *)arg;
 533        char if_name[IFNAMSIZ];
 534        struct pollfd net_poll[MAX_NET_FD];
 535        struct mic_vring tx_vr, rx_vr;
 536        struct mic_copy_desc copy;
 537        struct mic_device_desc *desc;
 538        int err;
 539
 540        snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
 541        mic->mic_net.tap_fd = tun_alloc(mic, if_name);
 542        if (mic->mic_net.tap_fd < 0)
 543                goto done;
 544
 545        if (tap_configure(mic, if_name))
 546                goto done;
 547        mpsslog("MIC name %s id %d\n", mic->name, mic->id);
 548
 549        net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
 550        net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
 551        net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
 552        net_poll[NET_FD_TUN].events = POLLIN;
 553
 554        if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
 555                                  VIRTIO_ID_NET, &tx_vr, &rx_vr,
 556                virtnet_dev_page.dd.num_vq)) {
 557                mpsslog("%s init_vr failed %s\n",
 558                        mic->name, strerror(errno));
 559                goto done;
 560        }
 561
 562        copy.iovcnt = 2;
 563        desc = get_device_desc(mic, VIRTIO_ID_NET);
 564
 565        while (1) {
 566                ssize_t len;
 567
 568                net_poll[NET_FD_VIRTIO_NET].revents = 0;
 569                net_poll[NET_FD_TUN].revents = 0;
 570
 571                /* Start polling for data from tap and virtio net */
 572                err = poll(net_poll, 2, -1);
 573                if (err < 0) {
 574                        mpsslog("%s poll failed %s\n",
 575                                __func__, strerror(errno));
 576                        continue;
 577                }
 578                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
 579                        wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
 580                                             VIRTIO_ID_NET);
 581                /*
 582                 * Check if there is data to be read from TUN and write to
 583                 * virtio net fd if there is.
 584                 */
 585                if (net_poll[NET_FD_TUN].revents & POLLIN) {
 586                        copy.iov = iov0;
 587                        len = readv(net_poll[NET_FD_TUN].fd,
 588                                copy.iov, copy.iovcnt);
 589                        if (len > 0) {
 590                                struct virtio_net_hdr *hdr
 591                                        = (struct virtio_net_hdr *)vnet_hdr[0];
 592
 593                                /* Disable checksums on the card since we are on
 594                                   a reliable PCIe link */
 595                                hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
 596#ifdef DEBUG
 597                                mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
 598                                        __func__, __LINE__, hdr->flags);
 599                                mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
 600                                        copy.out_len, hdr->gso_type);
 601#endif
 602#ifdef DEBUG
 603                                disp_iovec(mic, copy, __func__, __LINE__);
 604                                mpsslog("%s %s %d read from tap 0x%lx\n",
 605                                        mic->name, __func__, __LINE__,
 606                                        len);
 607#endif
 608                                spin_for_descriptors(mic, &tx_vr);
 609                                txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
 610                                             len);
 611
 612                                err = mic_virtio_copy(mic,
 613                                        mic->mic_net.virtio_net_fd, &tx_vr,
 614                                        &copy);
 615                                if (err < 0) {
 616                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 617                                                mic->name, __func__, __LINE__,
 618                                                strerror(errno));
 619                                }
 620                                if (!err)
 621                                        verify_out_len(mic, &copy);
 622#ifdef DEBUG
 623                                disp_iovec(mic, copy, __func__, __LINE__);
 624                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 625                                        mic->name, __func__, __LINE__,
 626                                        sum_iovec_len(&copy));
 627#endif
 628                                /* Reinitialize IOV for next run */
 629                                iov0[1].iov_len = MAX_NET_PKT_SIZE;
 630                        } else if (len < 0) {
 631                                disp_iovec(mic, &copy, __func__, __LINE__);
 632                                mpsslog("%s %s %d read failed %s ", mic->name,
 633                                        __func__, __LINE__, strerror(errno));
 634                                mpsslog("cnt %d sum %zd\n",
 635                                        copy.iovcnt, sum_iovec_len(&copy));
 636                        }
 637                }
 638
 639                /*
 640                 * Check if there is data to be read from virtio net and
 641                 * write to TUN if there is.
 642                 */
 643                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
 644                        while (rx_vr.info->avail_idx !=
 645                                le16toh(rx_vr.vr.avail->idx)) {
 646                                copy.iov = iov1;
 647                                txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
 648                                             MAX_NET_PKT_SIZE
 649                                        + sizeof(struct virtio_net_hdr));
 650
 651                                err = mic_virtio_copy(mic,
 652                                        mic->mic_net.virtio_net_fd, &rx_vr,
 653                                        &copy);
 654                                if (!err) {
 655#ifdef DEBUG
 656                                        struct virtio_net_hdr *hdr
 657                                                = (struct virtio_net_hdr *)
 658                                                        vnet_hdr[1];
 659
 660                                        mpsslog("%s %s %d hdr->flags 0x%x, ",
 661                                                mic->name, __func__, __LINE__,
 662                                                hdr->flags);
 663                                        mpsslog("out_len %d gso_type 0x%x\n",
 664                                                copy.out_len,
 665                                                hdr->gso_type);
 666#endif
 667                                        /* Set the correct output iov_len */
 668                                        iov1[1].iov_len = copy.out_len -
 669                                                sizeof(struct virtio_net_hdr);
 670                                        verify_out_len(mic, &copy);
 671#ifdef DEBUG
 672                                        disp_iovec(mic, copy, __func__,
 673                                                   __LINE__);
 674                                        mpsslog("%s %s %d ",
 675                                                mic->name, __func__, __LINE__);
 676                                        mpsslog("read from net 0x%lx\n",
 677                                                sum_iovec_len(copy));
 678#endif
 679                                        len = writev(net_poll[NET_FD_TUN].fd,
 680                                                copy.iov, copy.iovcnt);
 681                                        if (len != sum_iovec_len(&copy)) {
 682                                                mpsslog("Tun write failed %s ",
 683                                                        strerror(errno));
 684                                                mpsslog("len 0x%zx ", len);
 685                                                mpsslog("read_len 0x%zx\n",
 686                                                        sum_iovec_len(&copy));
 687                                        } else {
 688#ifdef DEBUG
 689                                                disp_iovec(mic, &copy, __func__,
 690                                                           __LINE__);
 691                                                mpsslog("%s %s %d ",
 692                                                        mic->name, __func__,
 693                                                        __LINE__);
 694                                                mpsslog("wrote to tap 0x%lx\n",
 695                                                        len);
 696#endif
 697                                        }
 698                                } else {
 699                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 700                                                mic->name, __func__, __LINE__,
 701                                                strerror(errno));
 702                                        break;
 703                                }
 704                        }
 705                }
 706                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 707                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 708        }
 709done:
 710        pthread_exit(NULL);
 711}
 712
 713/* virtio_console */
 714#define VIRTIO_CONSOLE_FD 0
 715#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
 716#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
 717#define MAX_BUFFER_SIZE PAGE_SIZE
 718
 719static void *
 720virtio_console(void *arg)
 721{
 722        static __u8 vcons_buf[2][PAGE_SIZE];
 723        struct iovec vcons_iov[2] = {
 724                { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
 725                { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
 726        };
 727        struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
 728        struct mic_info *mic = (struct mic_info *)arg;
 729        int err;
 730        struct pollfd console_poll[MAX_CONSOLE_FD];
 731        int pty_fd;
 732        char *pts_name;
 733        ssize_t len;
 734        struct mic_vring tx_vr, rx_vr;
 735        struct mic_copy_desc copy;
 736        struct mic_device_desc *desc;
 737
 738        pty_fd = posix_openpt(O_RDWR);
 739        if (pty_fd < 0) {
 740                mpsslog("can't open a pseudoterminal master device: %s\n",
 741                        strerror(errno));
 742                goto _return;
 743        }
 744        pts_name = ptsname(pty_fd);
 745        if (pts_name == NULL) {
 746                mpsslog("can't get pts name\n");
 747                goto _close_pty;
 748        }
 749        printf("%s console message goes to %s\n", mic->name, pts_name);
 750        mpsslog("%s console message goes to %s\n", mic->name, pts_name);
 751        err = grantpt(pty_fd);
 752        if (err < 0) {
 753                mpsslog("can't grant access: %s %s\n",
 754                        pts_name, strerror(errno));
 755                goto _close_pty;
 756        }
 757        err = unlockpt(pty_fd);
 758        if (err < 0) {
 759                mpsslog("can't unlock a pseudoterminal: %s %s\n",
 760                        pts_name, strerror(errno));
 761                goto _close_pty;
 762        }
 763        console_poll[MONITOR_FD].fd = pty_fd;
 764        console_poll[MONITOR_FD].events = POLLIN;
 765
 766        console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
 767        console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
 768
 769        if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
 770                                  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
 771                virtcons_dev_page.dd.num_vq)) {
 772                mpsslog("%s init_vr failed %s\n",
 773                        mic->name, strerror(errno));
 774                goto _close_pty;
 775        }
 776
 777        copy.iovcnt = 1;
 778        desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
 779
 780        for (;;) {
 781                console_poll[MONITOR_FD].revents = 0;
 782                console_poll[VIRTIO_CONSOLE_FD].revents = 0;
 783                err = poll(console_poll, MAX_CONSOLE_FD, -1);
 784                if (err < 0) {
 785                        mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
 786                                strerror(errno));
 787                        continue;
 788                }
 789                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
 790                        wait_for_card_driver(mic,
 791                                             mic->mic_console.virtio_console_fd,
 792                                VIRTIO_ID_CONSOLE);
 793
 794                if (console_poll[MONITOR_FD].revents & POLLIN) {
 795                        copy.iov = iov0;
 796                        len = readv(pty_fd, copy.iov, copy.iovcnt);
 797                        if (len > 0) {
 798#ifdef DEBUG
 799                                disp_iovec(mic, copy, __func__, __LINE__);
 800                                mpsslog("%s %s %d read from tap 0x%lx\n",
 801                                        mic->name, __func__, __LINE__,
 802                                        len);
 803#endif
 804                                spin_for_descriptors(mic, &tx_vr);
 805                                txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
 806                                             &copy, len);
 807
 808                                err = mic_virtio_copy(mic,
 809                                        mic->mic_console.virtio_console_fd,
 810                                        &tx_vr, &copy);
 811                                if (err < 0) {
 812                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 813                                                mic->name, __func__, __LINE__,
 814                                                strerror(errno));
 815                                }
 816                                if (!err)
 817                                        verify_out_len(mic, &copy);
 818#ifdef DEBUG
 819                                disp_iovec(mic, copy, __func__, __LINE__);
 820                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 821                                        mic->name, __func__, __LINE__,
 822                                        sum_iovec_len(copy));
 823#endif
 824                                /* Reinitialize IOV for next run */
 825                                iov0->iov_len = PAGE_SIZE;
 826                        } else if (len < 0) {
 827                                disp_iovec(mic, &copy, __func__, __LINE__);
 828                                mpsslog("%s %s %d read failed %s ",
 829                                        mic->name, __func__, __LINE__,
 830                                        strerror(errno));
 831                                mpsslog("cnt %d sum %zd\n",
 832                                        copy.iovcnt, sum_iovec_len(&copy));
 833                        }
 834                }
 835
 836                if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
 837                        while (rx_vr.info->avail_idx !=
 838                                le16toh(rx_vr.vr.avail->idx)) {
 839                                copy.iov = iov1;
 840                                txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
 841                                             &copy, PAGE_SIZE);
 842
 843                                err = mic_virtio_copy(mic,
 844                                        mic->mic_console.virtio_console_fd,
 845                                        &rx_vr, &copy);
 846                                if (!err) {
 847                                        /* Set the correct output iov_len */
 848                                        iov1->iov_len = copy.out_len;
 849                                        verify_out_len(mic, &copy);
 850#ifdef DEBUG
 851                                        disp_iovec(mic, copy, __func__,
 852                                                   __LINE__);
 853                                        mpsslog("%s %s %d ",
 854                                                mic->name, __func__, __LINE__);
 855                                        mpsslog("read from net 0x%lx\n",
 856                                                sum_iovec_len(copy));
 857#endif
 858                                        len = writev(pty_fd,
 859                                                copy.iov, copy.iovcnt);
 860                                        if (len != sum_iovec_len(&copy)) {
 861                                                mpsslog("Tun write failed %s ",
 862                                                        strerror(errno));
 863                                                mpsslog("len 0x%zx ", len);
 864                                                mpsslog("read_len 0x%zx\n",
 865                                                        sum_iovec_len(&copy));
 866                                        } else {
 867#ifdef DEBUG
 868                                                disp_iovec(mic, copy, __func__,
 869                                                           __LINE__);
 870                                                mpsslog("%s %s %d ",
 871                                                        mic->name, __func__,
 872                                                        __LINE__);
 873                                                mpsslog("wrote to tap 0x%lx\n",
 874                                                        len);
 875#endif
 876                                        }
 877                                } else {
 878                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 879                                                mic->name, __func__, __LINE__,
 880                                                strerror(errno));
 881                                        break;
 882                                }
 883                        }
 884                }
 885                if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 886                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 887        }
 888_close_pty:
 889        close(pty_fd);
 890_return:
 891        pthread_exit(NULL);
 892}
 893
 894static void
 895add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
 896{
 897        char path[PATH_MAX];
 898        int fd, err;
 899
 900        snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
 901        fd = open(path, O_RDWR);
 902        if (fd < 0) {
 903                mpsslog("Could not open %s %s\n", path, strerror(errno));
 904                return;
 905        }
 906
 907        err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
 908        if (err < 0) {
 909                mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
 910                close(fd);
 911                return;
 912        }
 913        switch (dd->type) {
 914        case VIRTIO_ID_NET:
 915                mic->mic_net.virtio_net_fd = fd;
 916                mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
 917                break;
 918        case VIRTIO_ID_CONSOLE:
 919                mic->mic_console.virtio_console_fd = fd;
 920                mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
 921                break;
 922        case VIRTIO_ID_BLOCK:
 923                mic->mic_virtblk.virtio_block_fd = fd;
 924                mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
 925                break;
 926        }
 927}
 928
 929static bool
 930set_backend_file(struct mic_info *mic)
 931{
 932        FILE *config;
 933        char buff[PATH_MAX], *line, *evv, *p;
 934
 935        snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
 936        config = fopen(buff, "r");
 937        if (config == NULL)
 938                return false;
 939        do {  /* look for "virtblk_backend=XXXX" */
 940                line = fgets(buff, PATH_MAX, config);
 941                if (line == NULL)
 942                        break;
 943                if (*line == '#')
 944                        continue;
 945                p = strchr(line, '\n');
 946                if (p)
 947                        *p = '\0';
 948        } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
 949        fclose(config);
 950        if (line == NULL)
 951                return false;
 952        evv = strchr(line, '=');
 953        if (evv == NULL)
 954                return false;
 955        mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
 956        if (mic->mic_virtblk.backend_file == NULL) {
 957                mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
 958                return false;
 959        }
 960        strcpy(mic->mic_virtblk.backend_file, evv + 1);
 961        return true;
 962}
 963
 964#define SECTOR_SIZE 512
 965static bool
 966set_backend_size(struct mic_info *mic)
 967{
 968        mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
 969                SEEK_END);
 970        if (mic->mic_virtblk.backend_size < 0) {
 971                mpsslog("%s: can't seek: %s\n",
 972                        mic->name, mic->mic_virtblk.backend_file);
 973                return false;
 974        }
 975        virtblk_dev_page.blk_config.capacity =
 976                mic->mic_virtblk.backend_size / SECTOR_SIZE;
 977        if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
 978                virtblk_dev_page.blk_config.capacity++;
 979
 980        virtblk_dev_page.blk_config.capacity =
 981                htole64(virtblk_dev_page.blk_config.capacity);
 982
 983        return true;
 984}
 985
 986static bool
 987open_backend(struct mic_info *mic)
 988{
 989        if (!set_backend_file(mic))
 990                goto _error_exit;
 991        mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
 992        if (mic->mic_virtblk.backend < 0) {
 993                mpsslog("%s: can't open: %s\n", mic->name,
 994                        mic->mic_virtblk.backend_file);
 995                goto _error_free;
 996        }
 997        if (!set_backend_size(mic))
 998                goto _error_close;
 999        mic->mic_virtblk.backend_addr = mmap(NULL,
1000                mic->mic_virtblk.backend_size,
1001                PROT_READ|PROT_WRITE, MAP_SHARED,
1002                mic->mic_virtblk.backend, 0L);
1003        if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1004                mpsslog("%s: can't map: %s %s\n",
1005                        mic->name, mic->mic_virtblk.backend_file,
1006                        strerror(errno));
1007                goto _error_close;
1008        }
1009        return true;
1010
1011 _error_close:
1012        close(mic->mic_virtblk.backend);
1013 _error_free:
1014        free(mic->mic_virtblk.backend_file);
1015 _error_exit:
1016        return false;
1017}
1018
1019static void
1020close_backend(struct mic_info *mic)
1021{
1022        munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1023        close(mic->mic_virtblk.backend);
1024        free(mic->mic_virtblk.backend_file);
1025}
1026
1027static bool
1028start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1029{
1030        if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1031                mpsslog("%s: blk_config is not 8 byte aligned.\n",
1032                        mic->name);
1033                return false;
1034        }
1035        add_virtio_device(mic, &virtblk_dev_page.dd);
1036        if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1037                                  VIRTIO_ID_BLOCK, vring, NULL,
1038                                  virtblk_dev_page.dd.num_vq)) {
1039                mpsslog("%s init_vr failed %s\n",
1040                        mic->name, strerror(errno));
1041                return false;
1042        }
1043        return true;
1044}
1045
1046static void
1047stop_virtblk(struct mic_info *mic)
1048{
1049        int vr_size, ret;
1050
1051        vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1052                MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1053        ret = munmap(mic->mic_virtblk.block_dp,
1054                MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1055        if (ret < 0)
1056                mpsslog("%s munmap errno %d\n", mic->name, errno);
1057        close(mic->mic_virtblk.virtio_block_fd);
1058}
1059
1060static __u8
1061header_error_check(struct vring_desc *desc)
1062{
1063        if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1064                mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1065                        __func__, __LINE__);
1066                return -EIO;
1067        }
1068        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1069                mpsslog("%s() %d: alone\n",
1070                        __func__, __LINE__);
1071                return -EIO;
1072        }
1073        if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1074                mpsslog("%s() %d: not read\n",
1075                        __func__, __LINE__);
1076                return -EIO;
1077        }
1078        return 0;
1079}
1080
1081static int
1082read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1083{
1084        struct iovec iovec;
1085        struct mic_copy_desc copy;
1086
1087        iovec.iov_len = sizeof(*hdr);
1088        iovec.iov_base = hdr;
1089        copy.iov = &iovec;
1090        copy.iovcnt = 1;
1091        copy.vr_idx = 0;  /* only one vring on virtio_block */
1092        copy.update_used = false;  /* do not update used index */
1093        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1094}
1095
1096static int
1097transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1098{
1099        struct mic_copy_desc copy;
1100
1101        copy.iov = iovec;
1102        copy.iovcnt = iovcnt;
1103        copy.vr_idx = 0;  /* only one vring on virtio_block */
1104        copy.update_used = false;  /* do not update used index */
1105        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1106}
1107
1108static __u8
1109status_error_check(struct vring_desc *desc)
1110{
1111        if (le32toh(desc->len) != sizeof(__u8)) {
1112                mpsslog("%s() %d: length is not sizeof(status)\n",
1113                        __func__, __LINE__);
1114                return -EIO;
1115        }
1116        return 0;
1117}
1118
1119static int
1120write_status(int fd, __u8 *status)
1121{
1122        struct iovec iovec;
1123        struct mic_copy_desc copy;
1124
1125        iovec.iov_base = status;
1126        iovec.iov_len = sizeof(*status);
1127        copy.iov = &iovec;
1128        copy.iovcnt = 1;
1129        copy.vr_idx = 0;  /* only one vring on virtio_block */
1130        copy.update_used = true; /* Update used index */
1131        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1132}
1133
1134static void *
1135virtio_block(void *arg)
1136{
1137        struct mic_info *mic = (struct mic_info *)arg;
1138        int ret;
1139        struct pollfd block_poll;
1140        struct mic_vring vring;
1141        __u16 avail_idx;
1142        __u32 desc_idx;
1143        struct vring_desc *desc;
1144        struct iovec *iovec, *piov;
1145        __u8 status;
1146        __u32 buffer_desc_idx;
1147        struct virtio_blk_outhdr hdr;
1148        void *fos;
1149
1150        for (;;) {  /* forever */
1151                if (!open_backend(mic)) { /* No virtblk */
1152                        for (mic->mic_virtblk.signaled = 0;
1153                                !mic->mic_virtblk.signaled;)
1154                                sleep(1);
1155                        continue;
1156                }
1157
1158                /* backend file is specified. */
1159                if (!start_virtblk(mic, &vring))
1160                        goto _close_backend;
1161                iovec = malloc(sizeof(*iovec) *
1162                        le32toh(virtblk_dev_page.blk_config.seg_max));
1163                if (!iovec) {
1164                        mpsslog("%s: can't alloc iovec: %s\n",
1165                                mic->name, strerror(ENOMEM));
1166                        goto _stop_virtblk;
1167                }
1168
1169                block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1170                block_poll.events = POLLIN;
1171                for (mic->mic_virtblk.signaled = 0;
1172                     !mic->mic_virtblk.signaled;) {
1173                        block_poll.revents = 0;
1174                                        /* timeout in 1 sec to see signaled */
1175                        ret = poll(&block_poll, 1, 1000);
1176                        if (ret < 0) {
1177                                mpsslog("%s %d: poll failed: %s\n",
1178                                        __func__, __LINE__,
1179                                        strerror(errno));
1180                                continue;
1181                        }
1182
1183                        if (!(block_poll.revents & POLLIN)) {
1184#ifdef DEBUG
1185                                mpsslog("%s %d: block_poll.revents=0x%x\n",
1186                                        __func__, __LINE__, block_poll.revents);
1187#endif
1188                                continue;
1189                        }
1190
1191                        /* POLLIN */
1192                        while (vring.info->avail_idx !=
1193                                le16toh(vring.vr.avail->idx)) {
1194                                /* read header element */
1195                                avail_idx =
1196                                        vring.info->avail_idx &
1197                                        (vring.vr.num - 1);
1198                                desc_idx = le16toh(
1199                                        vring.vr.avail->ring[avail_idx]);
1200                                desc = &vring.vr.desc[desc_idx];
1201#ifdef DEBUG
1202                                mpsslog("%s() %d: avail_idx=%d ",
1203                                        __func__, __LINE__,
1204                                        vring.info->avail_idx);
1205                                mpsslog("vring.vr.num=%d desc=%p\n",
1206                                        vring.vr.num, desc);
1207#endif
1208                                status = header_error_check(desc);
1209                                ret = read_header(
1210                                        mic->mic_virtblk.virtio_block_fd,
1211                                        &hdr, desc_idx);
1212                                if (ret < 0) {
1213                                        mpsslog("%s() %d %s: ret=%d %s\n",
1214                                                __func__, __LINE__,
1215                                                mic->name, ret,
1216                                                strerror(errno));
1217                                        break;
1218                                }
1219                                /* buffer element */
1220                                piov = iovec;
1221                                status = 0;
1222                                fos = mic->mic_virtblk.backend_addr +
1223                                        (hdr.sector * SECTOR_SIZE);
1224                                buffer_desc_idx = next_desc(desc);
1225                                desc_idx = buffer_desc_idx;
1226                                for (desc = &vring.vr.desc[buffer_desc_idx];
1227                                     desc->flags & VRING_DESC_F_NEXT;
1228                                     desc_idx = next_desc(desc),
1229                                             desc = &vring.vr.desc[desc_idx]) {
1230                                        piov->iov_len = desc->len;
1231                                        piov->iov_base = fos;
1232                                        piov++;
1233                                        fos += desc->len;
1234                                }
1235                                /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1236                                if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1237                                        VIRTIO_BLK_T_GET_ID)) {
1238                                        /*
1239                                          VIRTIO_BLK_T_IN - does not do
1240                                          anything. Probably for documenting.
1241                                          VIRTIO_BLK_T_SCSI_CMD - for
1242                                          virtio_scsi.
1243                                          VIRTIO_BLK_T_FLUSH - turned off in
1244                                          config space.
1245                                          VIRTIO_BLK_T_BARRIER - defined but not
1246                                          used in anywhere.
1247                                        */
1248                                        mpsslog("%s() %d: type %x ",
1249                                                __func__, __LINE__,
1250                                                hdr.type);
1251                                        mpsslog("is not supported\n");
1252                                        status = -ENOTSUP;
1253
1254                                } else {
1255                                        ret = transfer_blocks(
1256                                        mic->mic_virtblk.virtio_block_fd,
1257                                                iovec,
1258                                                piov - iovec);
1259                                        if (ret < 0 &&
1260                                            status != 0)
1261                                                status = ret;
1262                                }
1263                                /* write status and update used pointer */
1264                                if (status != 0)
1265                                        status = status_error_check(desc);
1266                                ret = write_status(
1267                                        mic->mic_virtblk.virtio_block_fd,
1268                                        &status);
1269#ifdef DEBUG
1270                                mpsslog("%s() %d: write status=%d on desc=%p\n",
1271                                        __func__, __LINE__,
1272                                        status, desc);
1273#endif
1274                        }
1275                }
1276                free(iovec);
1277_stop_virtblk:
1278                stop_virtblk(mic);
1279_close_backend:
1280                close_backend(mic);
1281        }  /* forever */
1282
1283        pthread_exit(NULL);
1284}
1285
1286static void
1287reset(struct mic_info *mic)
1288{
1289#define RESET_TIMEOUT 120
1290        int i = RESET_TIMEOUT;
1291        setsysfs(mic->name, "state", "reset");
1292        while (i) {
1293                char *state;
1294                state = readsysfs(mic->name, "state");
1295                if (!state)
1296                        goto retry;
1297                mpsslog("%s: %s %d state %s\n",
1298                        mic->name, __func__, __LINE__, state);
1299
1300                /*
1301                 * If the shutdown was initiated by OSPM, the state stays
1302                 * in "suspended" which is also a valid condition for reset.
1303                 */
1304                if ((!strcmp(state, "offline")) ||
1305                    (!strcmp(state, "suspended"))) {
1306                        free(state);
1307                        break;
1308                }
1309                free(state);
1310retry:
1311                sleep(1);
1312                i--;
1313        }
1314}
1315
1316static int
1317get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1318{
1319        if (!strcmp(shutdown_status, "nop"))
1320                return MIC_NOP;
1321        if (!strcmp(shutdown_status, "crashed"))
1322                return MIC_CRASHED;
1323        if (!strcmp(shutdown_status, "halted"))
1324                return MIC_HALTED;
1325        if (!strcmp(shutdown_status, "poweroff"))
1326                return MIC_POWER_OFF;
1327        if (!strcmp(shutdown_status, "restart"))
1328                return MIC_RESTART;
1329        mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1330        /* Invalid state */
1331        assert(0);
1332};
1333
1334static int get_mic_state(struct mic_info *mic, char *state)
1335{
1336        if (!strcmp(state, "offline"))
1337                return MIC_OFFLINE;
1338        if (!strcmp(state, "online"))
1339                return MIC_ONLINE;
1340        if (!strcmp(state, "shutting_down"))
1341                return MIC_SHUTTING_DOWN;
1342        if (!strcmp(state, "reset_failed"))
1343                return MIC_RESET_FAILED;
1344        if (!strcmp(state, "suspending"))
1345                return MIC_SUSPENDING;
1346        if (!strcmp(state, "suspended"))
1347                return MIC_SUSPENDED;
1348        mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1349        /* Invalid state */
1350        assert(0);
1351};
1352
1353static void mic_handle_shutdown(struct mic_info *mic)
1354{
1355#define SHUTDOWN_TIMEOUT 60
1356        int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1357        char *shutdown_status;
1358        while (i) {
1359                shutdown_status = readsysfs(mic->name, "shutdown_status");
1360                if (!shutdown_status)
1361                        continue;
1362                mpsslog("%s: %s %d shutdown_status %s\n",
1363                        mic->name, __func__, __LINE__, shutdown_status);
1364                switch (get_mic_shutdown_status(mic, shutdown_status)) {
1365                case MIC_RESTART:
1366                        mic->restart = 1;
1367                case MIC_HALTED:
1368                case MIC_POWER_OFF:
1369                case MIC_CRASHED:
1370                        free(shutdown_status);
1371                        goto reset;
1372                default:
1373                        break;
1374                }
1375                free(shutdown_status);
1376                sleep(1);
1377                i--;
1378        }
1379reset:
1380        ret = kill(mic->pid, SIGTERM);
1381        mpsslog("%s: %s %d kill pid %d ret %d\n",
1382                mic->name, __func__, __LINE__,
1383                mic->pid, ret);
1384        if (!ret) {
1385                ret = waitpid(mic->pid, &stat,
1386                        WIFSIGNALED(stat));
1387                mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1388                        mic->name, __func__, __LINE__,
1389                        ret, mic->pid);
1390        }
1391        if (ret == mic->pid)
1392                reset(mic);
1393}
1394
1395static void *
1396mic_config(void *arg)
1397{
1398        struct mic_info *mic = (struct mic_info *)arg;
1399        char *state = NULL;
1400        char pathname[PATH_MAX];
1401        int fd, ret;
1402        struct pollfd ufds[1];
1403        char value[4096];
1404
1405        snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1406                 MICSYSFSDIR, mic->name, "state");
1407
1408        fd = open(pathname, O_RDONLY);
1409        if (fd < 0) {
1410                mpsslog("%s: opening file %s failed %s\n",
1411                        mic->name, pathname, strerror(errno));
1412                goto error;
1413        }
1414
1415        do {
1416                ret = lseek(fd, 0, SEEK_SET);
1417                if (ret < 0) {
1418                        mpsslog("%s: Failed to seek to file start '%s': %s\n",
1419                                mic->name, pathname, strerror(errno));
1420                        goto close_error1;
1421                }
1422                ret = read(fd, value, sizeof(value));
1423                if (ret < 0) {
1424                        mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1425                                mic->name, pathname, strerror(errno));
1426                        goto close_error1;
1427                }
1428retry:
1429                state = readsysfs(mic->name, "state");
1430                if (!state)
1431                        goto retry;
1432                mpsslog("%s: %s %d state %s\n",
1433                        mic->name, __func__, __LINE__, state);
1434                switch (get_mic_state(mic, state)) {
1435                case MIC_SHUTTING_DOWN:
1436                        mic_handle_shutdown(mic);
1437                        goto close_error;
1438                case MIC_SUSPENDING:
1439                        mic->boot_on_resume = 1;
1440                        setsysfs(mic->name, "state", "suspend");
1441                        mic_handle_shutdown(mic);
1442                        goto close_error;
1443                case MIC_OFFLINE:
1444                        if (mic->boot_on_resume) {
1445                                setsysfs(mic->name, "state", "boot");
1446                                mic->boot_on_resume = 0;
1447                        }
1448                        break;
1449                default:
1450                        break;
1451                }
1452                free(state);
1453
1454                ufds[0].fd = fd;
1455                ufds[0].events = POLLERR | POLLPRI;
1456                ret = poll(ufds, 1, -1);
1457                if (ret < 0) {
1458                        mpsslog("%s: poll failed %s\n",
1459                                mic->name, strerror(errno));
1460                        goto close_error1;
1461                }
1462        } while (1);
1463close_error:
1464        free(state);
1465close_error1:
1466        close(fd);
1467error:
1468        init_mic(mic);
1469        pthread_exit(NULL);
1470}
1471
1472static void
1473set_cmdline(struct mic_info *mic)
1474{
1475        char buffer[PATH_MAX];
1476        int len;
1477
1478        len = snprintf(buffer, PATH_MAX,
1479                "clocksource=tsc highres=off nohz=off ");
1480        len += snprintf(buffer + len, PATH_MAX - len,
1481                "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1482        len += snprintf(buffer + len, PATH_MAX - len,
1483                "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1484                mic->id);
1485
1486        setsysfs(mic->name, "cmdline", buffer);
1487        mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1488        snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1489        mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1490}
1491
1492static void
1493set_log_buf_info(struct mic_info *mic)
1494{
1495        int fd;
1496        off_t len;
1497        char system_map[] = "/lib/firmware/mic/System.map";
1498        char *map, *temp, log_buf[17] = {'\0'};
1499
1500        fd = open(system_map, O_RDONLY);
1501        if (fd < 0) {
1502                mpsslog("%s: Opening System.map failed: %d\n",
1503                        mic->name, errno);
1504                return;
1505        }
1506        len = lseek(fd, 0, SEEK_END);
1507        if (len < 0) {
1508                mpsslog("%s: Reading System.map size failed: %d\n",
1509                        mic->name, errno);
1510                close(fd);
1511                return;
1512        }
1513        map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1514        if (map == MAP_FAILED) {
1515                mpsslog("%s: mmap of System.map failed: %d\n",
1516                        mic->name, errno);
1517                close(fd);
1518                return;
1519        }
1520        temp = strstr(map, "__log_buf");
1521        if (!temp) {
1522                mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1523                munmap(map, len);
1524                close(fd);
1525                return;
1526        }
1527        strncpy(log_buf, temp - 19, 16);
1528        setsysfs(mic->name, "log_buf_addr", log_buf);
1529        mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1530        temp = strstr(map, "log_buf_len");
1531        if (!temp) {
1532                mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1533                munmap(map, len);
1534                close(fd);
1535                return;
1536        }
1537        strncpy(log_buf, temp - 19, 16);
1538        setsysfs(mic->name, "log_buf_len", log_buf);
1539        mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1540        munmap(map, len);
1541        close(fd);
1542}
1543
1544static void init_mic(struct mic_info *mic);
1545
1546static void
1547change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1548{
1549        struct mic_info *mic;
1550
1551        for (mic = mic_list.next; mic != NULL; mic = mic->next)
1552                mic->mic_virtblk.signaled = 1/* true */;
1553}
1554
1555static void
1556init_mic(struct mic_info *mic)
1557{
1558        struct sigaction ignore = {
1559                .sa_flags = 0,
1560                .sa_handler = SIG_IGN
1561        };
1562        struct sigaction act = {
1563                .sa_flags = SA_SIGINFO,
1564                .sa_sigaction = change_virtblk_backend,
1565        };
1566        char buffer[PATH_MAX];
1567        int err;
1568
1569        /*
1570         * Currently, one virtio block device is supported for each MIC card
1571         * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1572         * The signal informs the virtio block backend about a change in the
1573         * configuration file which specifies the virtio backend file name on
1574         * the host. Virtio block backend then re-reads the configuration file
1575         * and switches to the new block device. This signalling mechanism may
1576         * not be required once multiple virtio block devices are supported by
1577         * the MIC daemon.
1578         */
1579        sigaction(SIGUSR1, &ignore, NULL);
1580
1581        mic->pid = fork();
1582        switch (mic->pid) {
1583        case 0:
1584                set_log_buf_info(mic);
1585                set_cmdline(mic);
1586                add_virtio_device(mic, &virtcons_dev_page.dd);
1587                add_virtio_device(mic, &virtnet_dev_page.dd);
1588                err = pthread_create(&mic->mic_console.console_thread, NULL,
1589                        virtio_console, mic);
1590                if (err)
1591                        mpsslog("%s virtcons pthread_create failed %s\n",
1592                                mic->name, strerror(err));
1593                err = pthread_create(&mic->mic_net.net_thread, NULL,
1594                        virtio_net, mic);
1595                if (err)
1596                        mpsslog("%s virtnet pthread_create failed %s\n",
1597                                mic->name, strerror(err));
1598                err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1599                        virtio_block, mic);
1600                if (err)
1601                        mpsslog("%s virtblk pthread_create failed %s\n",
1602                                mic->name, strerror(err));
1603                sigemptyset(&act.sa_mask);
1604                err = sigaction(SIGUSR1, &act, NULL);
1605                if (err)
1606                        mpsslog("%s sigaction SIGUSR1 failed %s\n",
1607                                mic->name, strerror(errno));
1608                while (1)
1609                        sleep(60);
1610        case -1:
1611                mpsslog("fork failed MIC name %s id %d errno %d\n",
1612                        mic->name, mic->id, errno);
1613                break;
1614        default:
1615                if (mic->restart) {
1616                        snprintf(buffer, PATH_MAX, "boot");
1617                        setsysfs(mic->name, "state", buffer);
1618                        mpsslog("%s restarting mic %d\n",
1619                                mic->name, mic->restart);
1620                        mic->restart = 0;
1621                }
1622                pthread_create(&mic->config_thread, NULL, mic_config, mic);
1623        }
1624}
1625
1626static void
1627start_daemon(void)
1628{
1629        struct mic_info *mic;
1630
1631        for (mic = mic_list.next; mic != NULL; mic = mic->next)
1632                init_mic(mic);
1633
1634        while (1)
1635                sleep(60);
1636}
1637
1638static int
1639init_mic_list(void)
1640{
1641        struct mic_info *mic = &mic_list;
1642        struct dirent *file;
1643        DIR *dp;
1644        int cnt = 0;
1645
1646        dp = opendir(MICSYSFSDIR);
1647        if (!dp)
1648                return 0;
1649
1650        while ((file = readdir(dp)) != NULL) {
1651                if (!strncmp(file->d_name, "mic", 3)) {
1652                        mic->next = calloc(1, sizeof(struct mic_info));
1653                        if (mic->next) {
1654                                mic = mic->next;
1655                                mic->id = atoi(&file->d_name[3]);
1656                                mic->name = malloc(strlen(file->d_name) + 16);
1657                                if (mic->name)
1658                                        strcpy(mic->name, file->d_name);
1659                                mpsslog("MIC name %s id %d\n", mic->name,
1660                                        mic->id);
1661                                cnt++;
1662                        }
1663                }
1664        }
1665
1666        closedir(dp);
1667        return cnt;
1668}
1669
1670void
1671mpsslog(char *format, ...)
1672{
1673        va_list args;
1674        char buffer[4096];
1675        char ts[52], *ts1;
1676        time_t t;
1677
1678        if (logfp == NULL)
1679                return;
1680
1681        va_start(args, format);
1682        vsprintf(buffer, format, args);
1683        va_end(args);
1684
1685        time(&t);
1686        ts1 = ctime_r(&t, ts);
1687        ts1[strlen(ts1) - 1] = '\0';
1688        fprintf(logfp, "%s: %s", ts1, buffer);
1689
1690        fflush(logfp);
1691}
1692
1693int
1694main(int argc, char *argv[])
1695{
1696        int cnt;
1697        pid_t pid;
1698
1699        myname = argv[0];
1700
1701        logfp = fopen(LOGFILE_NAME, "a+");
1702        if (!logfp) {
1703                fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1704                exit(1);
1705        }
1706        pid = fork();
1707        switch (pid) {
1708        case 0:
1709                break;
1710        case -1:
1711                exit(2);
1712        default:
1713                exit(0);
1714        }
1715
1716        mpsslog("MIC Daemon start\n");
1717
1718        cnt = init_mic_list();
1719        if (cnt == 0) {
1720                mpsslog("MIC module not loaded\n");
1721                exit(3);
1722        }
1723        mpsslog("MIC found %d devices\n", cnt);
1724
1725        start_daemon();
1726
1727        exit(0);
1728}
1729