linux/Documentation/mic/mpssd/mpssd.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2013 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * The full GNU General Public License is included in this distribution in
  16 * the file called "COPYING".
  17 *
  18 * Intel MIC User Space Tools.
  19 */
  20
  21#define _GNU_SOURCE
  22
  23#include <stdlib.h>
  24#include <fcntl.h>
  25#include <getopt.h>
  26#include <assert.h>
  27#include <unistd.h>
  28#include <stdbool.h>
  29#include <signal.h>
  30#include <poll.h>
  31#include <features.h>
  32#include <sys/types.h>
  33#include <sys/stat.h>
  34#include <sys/mman.h>
  35#include <sys/socket.h>
  36#include <linux/virtio_ring.h>
  37#include <linux/virtio_net.h>
  38#include <linux/virtio_console.h>
  39#include <linux/virtio_blk.h>
  40#include <linux/version.h>
  41#include "mpssd.h"
  42#include <linux/mic_ioctl.h>
  43#include <linux/mic_common.h>
  44
  45static void init_mic(struct mic_info *mic);
  46
  47static FILE *logfp;
  48static struct mic_info mic_list;
  49
  50#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  51
  52#define min_t(type, x, y) ({                            \
  53                type __min1 = (x);                      \
  54                type __min2 = (y);                      \
  55                __min1 < __min2 ? __min1 : __min2; })
  56
  57/* align addr on a size boundary - adjust address up/down if needed */
  58#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
  59#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
  60
  61/* align addr on a size boundary - adjust address up if needed */
  62#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
  63
  64/* to align the pointer to the (next) page boundary */
  65#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
  66
  67#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  68
  69#define GSO_ENABLED             1
  70#define MAX_GSO_SIZE            (64 * 1024)
  71#define ETH_H_LEN               14
  72#define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  73#define MIC_DEVICE_PAGE_END     0x1000
  74
  75#ifndef VIRTIO_NET_HDR_F_DATA_VALID
  76#define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
  77#endif
  78
  79static struct {
  80        struct mic_device_desc dd;
  81        struct mic_vqconfig vqconfig[2];
  82        __u32 host_features, guest_acknowledgements;
  83        struct virtio_console_config cons_config;
  84} virtcons_dev_page = {
  85        .dd = {
  86                .type = VIRTIO_ID_CONSOLE,
  87                .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  88                .feature_len = sizeof(virtcons_dev_page.host_features),
  89                .config_len = sizeof(virtcons_dev_page.cons_config),
  90        },
  91        .vqconfig[0] = {
  92                .num = htole16(MIC_VRING_ENTRIES),
  93        },
  94        .vqconfig[1] = {
  95                .num = htole16(MIC_VRING_ENTRIES),
  96        },
  97};
  98
  99static struct {
 100        struct mic_device_desc dd;
 101        struct mic_vqconfig vqconfig[2];
 102        __u32 host_features, guest_acknowledgements;
 103        struct virtio_net_config net_config;
 104} virtnet_dev_page = {
 105        .dd = {
 106                .type = VIRTIO_ID_NET,
 107                .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
 108                .feature_len = sizeof(virtnet_dev_page.host_features),
 109                .config_len = sizeof(virtnet_dev_page.net_config),
 110        },
 111        .vqconfig[0] = {
 112                .num = htole16(MIC_VRING_ENTRIES),
 113        },
 114        .vqconfig[1] = {
 115                .num = htole16(MIC_VRING_ENTRIES),
 116        },
 117#if GSO_ENABLED
 118                .host_features = htole32(
 119                1 << VIRTIO_NET_F_CSUM |
 120                1 << VIRTIO_NET_F_GSO |
 121                1 << VIRTIO_NET_F_GUEST_TSO4 |
 122                1 << VIRTIO_NET_F_GUEST_TSO6 |
 123                1 << VIRTIO_NET_F_GUEST_ECN |
 124                1 << VIRTIO_NET_F_GUEST_UFO),
 125#else
 126                .host_features = 0,
 127#endif
 128};
 129
 130static const char *mic_config_dir = "/etc/sysconfig/mic";
 131static const char *virtblk_backend = "VIRTBLK_BACKEND";
 132static struct {
 133        struct mic_device_desc dd;
 134        struct mic_vqconfig vqconfig[1];
 135        __u32 host_features, guest_acknowledgements;
 136        struct virtio_blk_config blk_config;
 137} virtblk_dev_page = {
 138        .dd = {
 139                .type = VIRTIO_ID_BLOCK,
 140                .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
 141                .feature_len = sizeof(virtblk_dev_page.host_features),
 142                .config_len = sizeof(virtblk_dev_page.blk_config),
 143        },
 144        .vqconfig[0] = {
 145                .num = htole16(MIC_VRING_ENTRIES),
 146        },
 147        .host_features =
 148                htole32(1<<VIRTIO_BLK_F_SEG_MAX),
 149        .blk_config = {
 150                .seg_max = htole32(MIC_VRING_ENTRIES - 2),
 151                .capacity = htole64(0),
 152         }
 153};
 154
 155static char *myname;
 156
 157static int
 158tap_configure(struct mic_info *mic, char *dev)
 159{
 160        pid_t pid;
 161        char *ifargv[7];
 162        char ipaddr[IFNAMSIZ];
 163        int ret = 0;
 164
 165        pid = fork();
 166        if (pid == 0) {
 167                ifargv[0] = "ip";
 168                ifargv[1] = "link";
 169                ifargv[2] = "set";
 170                ifargv[3] = dev;
 171                ifargv[4] = "up";
 172                ifargv[5] = NULL;
 173                mpsslog("Configuring %s\n", dev);
 174                ret = execvp("ip", ifargv);
 175                if (ret < 0) {
 176                        mpsslog("%s execvp failed errno %s\n",
 177                                mic->name, strerror(errno));
 178                        return ret;
 179                }
 180        }
 181        if (pid < 0) {
 182                mpsslog("%s fork failed errno %s\n",
 183                        mic->name, strerror(errno));
 184                return ret;
 185        }
 186
 187        ret = waitpid(pid, NULL, 0);
 188        if (ret < 0) {
 189                mpsslog("%s waitpid failed errno %s\n",
 190                        mic->name, strerror(errno));
 191                return ret;
 192        }
 193
 194        snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
 195
 196        pid = fork();
 197        if (pid == 0) {
 198                ifargv[0] = "ip";
 199                ifargv[1] = "addr";
 200                ifargv[2] = "add";
 201                ifargv[3] = ipaddr;
 202                ifargv[4] = "dev";
 203                ifargv[5] = dev;
 204                ifargv[6] = NULL;
 205                mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
 206                ret = execvp("ip", ifargv);
 207                if (ret < 0) {
 208                        mpsslog("%s execvp failed errno %s\n",
 209                                mic->name, strerror(errno));
 210                        return ret;
 211                }
 212        }
 213        if (pid < 0) {
 214                mpsslog("%s fork failed errno %s\n",
 215                        mic->name, strerror(errno));
 216                return ret;
 217        }
 218
 219        ret = waitpid(pid, NULL, 0);
 220        if (ret < 0) {
 221                mpsslog("%s waitpid failed errno %s\n",
 222                        mic->name, strerror(errno));
 223                return ret;
 224        }
 225        mpsslog("MIC name %s %s %d DONE!\n",
 226                mic->name, __func__, __LINE__);
 227        return 0;
 228}
 229
 230static int tun_alloc(struct mic_info *mic, char *dev)
 231{
 232        struct ifreq ifr;
 233        int fd, err;
 234#if GSO_ENABLED
 235        unsigned offload;
 236#endif
 237        fd = open("/dev/net/tun", O_RDWR);
 238        if (fd < 0) {
 239                mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
 240                goto done;
 241        }
 242
 243        memset(&ifr, 0, sizeof(ifr));
 244
 245        ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 246        if (*dev)
 247                strncpy(ifr.ifr_name, dev, IFNAMSIZ);
 248
 249        err = ioctl(fd, TUNSETIFF, (void *)&ifr);
 250        if (err < 0) {
 251                mpsslog("%s %s %d TUNSETIFF failed %s\n",
 252                        mic->name, __func__, __LINE__, strerror(errno));
 253                close(fd);
 254                return err;
 255        }
 256#if GSO_ENABLED
 257        offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
 258                TUN_F_TSO_ECN | TUN_F_UFO;
 259
 260        err = ioctl(fd, TUNSETOFFLOAD, offload);
 261        if (err < 0) {
 262                mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
 263                        mic->name, __func__, __LINE__, strerror(errno));
 264                close(fd);
 265                return err;
 266        }
 267#endif
 268        strcpy(dev, ifr.ifr_name);
 269        mpsslog("Created TAP %s\n", dev);
 270done:
 271        return fd;
 272}
 273
 274#define NET_FD_VIRTIO_NET 0
 275#define NET_FD_TUN 1
 276#define MAX_NET_FD 2
 277
 278static void set_dp(struct mic_info *mic, int type, void *dp)
 279{
 280        switch (type) {
 281        case VIRTIO_ID_CONSOLE:
 282                mic->mic_console.console_dp = dp;
 283                return;
 284        case VIRTIO_ID_NET:
 285                mic->mic_net.net_dp = dp;
 286                return;
 287        case VIRTIO_ID_BLOCK:
 288                mic->mic_virtblk.block_dp = dp;
 289                return;
 290        }
 291        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 292        assert(0);
 293}
 294
 295static void *get_dp(struct mic_info *mic, int type)
 296{
 297        switch (type) {
 298        case VIRTIO_ID_CONSOLE:
 299                return mic->mic_console.console_dp;
 300        case VIRTIO_ID_NET:
 301                return mic->mic_net.net_dp;
 302        case VIRTIO_ID_BLOCK:
 303                return mic->mic_virtblk.block_dp;
 304        }
 305        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 306        assert(0);
 307        return NULL;
 308}
 309
 310static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
 311{
 312        struct mic_device_desc *d;
 313        int i;
 314        void *dp = get_dp(mic, type);
 315
 316        for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
 317                i += mic_total_desc_size(d)) {
 318                d = dp + i;
 319
 320                /* End of list */
 321                if (d->type == 0)
 322                        break;
 323
 324                if (d->type == -1)
 325                        continue;
 326
 327                mpsslog("%s %s d-> type %d d %p\n",
 328                        mic->name, __func__, d->type, d);
 329
 330                if (d->type == (__u8)type)
 331                        return d;
 332        }
 333        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 334        assert(0);
 335        return NULL;
 336}
 337
 338/* See comments in vhost.c for explanation of next_desc() */
 339static unsigned next_desc(struct vring_desc *desc)
 340{
 341        unsigned int next;
 342
 343        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
 344                return -1U;
 345        next = le16toh(desc->next);
 346        return next;
 347}
 348
 349/* Sum up all the IOVEC length */
 350static ssize_t
 351sum_iovec_len(struct mic_copy_desc *copy)
 352{
 353        ssize_t sum = 0;
 354        int i;
 355
 356        for (i = 0; i < copy->iovcnt; i++)
 357                sum += copy->iov[i].iov_len;
 358        return sum;
 359}
 360
 361static inline void verify_out_len(struct mic_info *mic,
 362        struct mic_copy_desc *copy)
 363{
 364        if (copy->out_len != sum_iovec_len(copy)) {
 365                mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
 366                        mic->name, __func__, __LINE__,
 367                        copy->out_len, sum_iovec_len(copy));
 368                assert(copy->out_len == sum_iovec_len(copy));
 369        }
 370}
 371
 372/* Display an iovec */
 373static void
 374disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
 375           const char *s, int line)
 376{
 377        int i;
 378
 379        for (i = 0; i < copy->iovcnt; i++)
 380                mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
 381                        mic->name, s, line, i,
 382                        copy->iov[i].iov_base, copy->iov[i].iov_len);
 383}
 384
 385static inline __u16 read_avail_idx(struct mic_vring *vr)
 386{
 387        return ACCESS_ONCE(vr->info->avail_idx);
 388}
 389
 390static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
 391                                struct mic_copy_desc *copy, ssize_t len)
 392{
 393        copy->vr_idx = tx ? 0 : 1;
 394        copy->update_used = true;
 395        if (type == VIRTIO_ID_NET)
 396                copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
 397        else
 398                copy->iov[0].iov_len = len;
 399}
 400
 401/* Central API which triggers the copies */
 402static int
 403mic_virtio_copy(struct mic_info *mic, int fd,
 404                struct mic_vring *vr, struct mic_copy_desc *copy)
 405{
 406        int ret;
 407
 408        ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
 409        if (ret) {
 410                mpsslog("%s %s %d errno %s ret %d\n",
 411                        mic->name, __func__, __LINE__,
 412                        strerror(errno), ret);
 413        }
 414        return ret;
 415}
 416
 417/*
 418 * This initialization routine requires at least one
 419 * vring i.e. vr0. vr1 is optional.
 420 */
 421static void *
 422init_vr(struct mic_info *mic, int fd, int type,
 423        struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
 424{
 425        int vr_size;
 426        char *va;
 427
 428        vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
 429                MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
 430        va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
 431                PROT_READ, MAP_SHARED, fd, 0);
 432        if (MAP_FAILED == va) {
 433                mpsslog("%s %s %d mmap failed errno %s\n",
 434                        mic->name, __func__, __LINE__,
 435                        strerror(errno));
 436                goto done;
 437        }
 438        set_dp(mic, type, va);
 439        vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
 440        vr0->info = vr0->va +
 441                vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
 442        vring_init(&vr0->vr,
 443                   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
 444        mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
 445                __func__, mic->name, vr0->va, vr0->info, vr_size,
 446                vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 447        mpsslog("magic 0x%x expected 0x%x\n",
 448                le32toh(vr0->info->magic), MIC_MAGIC + type);
 449        assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
 450        if (vr1) {
 451                vr1->va = (struct mic_vring *)
 452                        &va[MIC_DEVICE_PAGE_END + vr_size];
 453                vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
 454                        MIC_VIRTIO_RING_ALIGN);
 455                vring_init(&vr1->vr,
 456                           MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
 457                mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
 458                        __func__, mic->name, vr1->va, vr1->info, vr_size,
 459                        vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 460                mpsslog("magic 0x%x expected 0x%x\n",
 461                        le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
 462                assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
 463        }
 464done:
 465        return va;
 466}
 467
 468static void
 469wait_for_card_driver(struct mic_info *mic, int fd, int type)
 470{
 471        struct pollfd pollfd;
 472        int err;
 473        struct mic_device_desc *desc = get_device_desc(mic, type);
 474
 475        pollfd.fd = fd;
 476        mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
 477                mic->name, __func__, type, desc->status);
 478        while (1) {
 479                pollfd.events = POLLIN;
 480                pollfd.revents = 0;
 481                err = poll(&pollfd, 1, -1);
 482                if (err < 0) {
 483                        mpsslog("%s %s poll failed %s\n",
 484                                mic->name, __func__, strerror(errno));
 485                        continue;
 486                }
 487
 488                if (pollfd.revents) {
 489                        mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
 490                                mic->name, __func__, type, desc->status);
 491                        if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
 492                                mpsslog("%s %s poll.revents %d\n",
 493                                        mic->name, __func__, pollfd.revents);
 494                                mpsslog("%s %s desc-> type %d status 0x%x\n",
 495                                        mic->name, __func__, type,
 496                                        desc->status);
 497                                break;
 498                        }
 499                }
 500        }
 501}
 502
 503/* Spin till we have some descriptors */
 504static void
 505spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
 506{
 507        __u16 avail_idx = read_avail_idx(vr);
 508
 509        while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
 510#ifdef DEBUG
 511                mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
 512                        mic->name, __func__,
 513                        le16toh(vr->vr.avail->idx), vr->info->avail_idx);
 514#endif
 515                sched_yield();
 516        }
 517}
 518
 519static void *
 520virtio_net(void *arg)
 521{
 522        static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
 523        static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
 524        struct iovec vnet_iov[2][2] = {
 525                { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
 526                  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
 527                { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
 528                  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
 529        };
 530        struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
 531        struct mic_info *mic = (struct mic_info *)arg;
 532        char if_name[IFNAMSIZ];
 533        struct pollfd net_poll[MAX_NET_FD];
 534        struct mic_vring tx_vr, rx_vr;
 535        struct mic_copy_desc copy;
 536        struct mic_device_desc *desc;
 537        int err;
 538
 539        snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
 540        mic->mic_net.tap_fd = tun_alloc(mic, if_name);
 541        if (mic->mic_net.tap_fd < 0)
 542                goto done;
 543
 544        if (tap_configure(mic, if_name))
 545                goto done;
 546        mpsslog("MIC name %s id %d\n", mic->name, mic->id);
 547
 548        net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
 549        net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
 550        net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
 551        net_poll[NET_FD_TUN].events = POLLIN;
 552
 553        if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
 554                                  VIRTIO_ID_NET, &tx_vr, &rx_vr,
 555                virtnet_dev_page.dd.num_vq)) {
 556                mpsslog("%s init_vr failed %s\n",
 557                        mic->name, strerror(errno));
 558                goto done;
 559        }
 560
 561        copy.iovcnt = 2;
 562        desc = get_device_desc(mic, VIRTIO_ID_NET);
 563
 564        while (1) {
 565                ssize_t len;
 566
 567                net_poll[NET_FD_VIRTIO_NET].revents = 0;
 568                net_poll[NET_FD_TUN].revents = 0;
 569
 570                /* Start polling for data from tap and virtio net */
 571                err = poll(net_poll, 2, -1);
 572                if (err < 0) {
 573                        mpsslog("%s poll failed %s\n",
 574                                __func__, strerror(errno));
 575                        continue;
 576                }
 577                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
 578                        wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
 579                                             VIRTIO_ID_NET);
 580                /*
 581                 * Check if there is data to be read from TUN and write to
 582                 * virtio net fd if there is.
 583                 */
 584                if (net_poll[NET_FD_TUN].revents & POLLIN) {
 585                        copy.iov = iov0;
 586                        len = readv(net_poll[NET_FD_TUN].fd,
 587                                copy.iov, copy.iovcnt);
 588                        if (len > 0) {
 589                                struct virtio_net_hdr *hdr
 590                                        = (struct virtio_net_hdr *)vnet_hdr[0];
 591
 592                                /* Disable checksums on the card since we are on
 593                                   a reliable PCIe link */
 594                                hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
 595#ifdef DEBUG
 596                                mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
 597                                        __func__, __LINE__, hdr->flags);
 598                                mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
 599                                        copy.out_len, hdr->gso_type);
 600#endif
 601#ifdef DEBUG
 602                                disp_iovec(mic, copy, __func__, __LINE__);
 603                                mpsslog("%s %s %d read from tap 0x%lx\n",
 604                                        mic->name, __func__, __LINE__,
 605                                        len);
 606#endif
 607                                spin_for_descriptors(mic, &tx_vr);
 608                                txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
 609                                             len);
 610
 611                                err = mic_virtio_copy(mic,
 612                                        mic->mic_net.virtio_net_fd, &tx_vr,
 613                                        &copy);
 614                                if (err < 0) {
 615                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 616                                                mic->name, __func__, __LINE__,
 617                                                strerror(errno));
 618                                }
 619                                if (!err)
 620                                        verify_out_len(mic, &copy);
 621#ifdef DEBUG
 622                                disp_iovec(mic, copy, __func__, __LINE__);
 623                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 624                                        mic->name, __func__, __LINE__,
 625                                        sum_iovec_len(&copy));
 626#endif
 627                                /* Reinitialize IOV for next run */
 628                                iov0[1].iov_len = MAX_NET_PKT_SIZE;
 629                        } else if (len < 0) {
 630                                disp_iovec(mic, &copy, __func__, __LINE__);
 631                                mpsslog("%s %s %d read failed %s ", mic->name,
 632                                        __func__, __LINE__, strerror(errno));
 633                                mpsslog("cnt %d sum %zd\n",
 634                                        copy.iovcnt, sum_iovec_len(&copy));
 635                        }
 636                }
 637
 638                /*
 639                 * Check if there is data to be read from virtio net and
 640                 * write to TUN if there is.
 641                 */
 642                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
 643                        while (rx_vr.info->avail_idx !=
 644                                le16toh(rx_vr.vr.avail->idx)) {
 645                                copy.iov = iov1;
 646                                txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
 647                                             MAX_NET_PKT_SIZE
 648                                        + sizeof(struct virtio_net_hdr));
 649
 650                                err = mic_virtio_copy(mic,
 651                                        mic->mic_net.virtio_net_fd, &rx_vr,
 652                                        &copy);
 653                                if (!err) {
 654#ifdef DEBUG
 655                                        struct virtio_net_hdr *hdr
 656                                                = (struct virtio_net_hdr *)
 657                                                        vnet_hdr[1];
 658
 659                                        mpsslog("%s %s %d hdr->flags 0x%x, ",
 660                                                mic->name, __func__, __LINE__,
 661                                                hdr->flags);
 662                                        mpsslog("out_len %d gso_type 0x%x\n",
 663                                                copy.out_len,
 664                                                hdr->gso_type);
 665#endif
 666                                        /* Set the correct output iov_len */
 667                                        iov1[1].iov_len = copy.out_len -
 668                                                sizeof(struct virtio_net_hdr);
 669                                        verify_out_len(mic, &copy);
 670#ifdef DEBUG
 671                                        disp_iovec(mic, copy, __func__,
 672                                                   __LINE__);
 673                                        mpsslog("%s %s %d ",
 674                                                mic->name, __func__, __LINE__);
 675                                        mpsslog("read from net 0x%lx\n",
 676                                                sum_iovec_len(copy));
 677#endif
 678                                        len = writev(net_poll[NET_FD_TUN].fd,
 679                                                copy.iov, copy.iovcnt);
 680                                        if (len != sum_iovec_len(&copy)) {
 681                                                mpsslog("Tun write failed %s ",
 682                                                        strerror(errno));
 683                                                mpsslog("len 0x%zx ", len);
 684                                                mpsslog("read_len 0x%zx\n",
 685                                                        sum_iovec_len(&copy));
 686                                        } else {
 687#ifdef DEBUG
 688                                                disp_iovec(mic, &copy, __func__,
 689                                                           __LINE__);
 690                                                mpsslog("%s %s %d ",
 691                                                        mic->name, __func__,
 692                                                        __LINE__);
 693                                                mpsslog("wrote to tap 0x%lx\n",
 694                                                        len);
 695#endif
 696                                        }
 697                                } else {
 698                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 699                                                mic->name, __func__, __LINE__,
 700                                                strerror(errno));
 701                                        break;
 702                                }
 703                        }
 704                }
 705                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 706                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 707        }
 708done:
 709        pthread_exit(NULL);
 710}
 711
 712/* virtio_console */
 713#define VIRTIO_CONSOLE_FD 0
 714#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
 715#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
 716#define MAX_BUFFER_SIZE PAGE_SIZE
 717
 718static void *
 719virtio_console(void *arg)
 720{
 721        static __u8 vcons_buf[2][PAGE_SIZE];
 722        struct iovec vcons_iov[2] = {
 723                { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
 724                { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
 725        };
 726        struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
 727        struct mic_info *mic = (struct mic_info *)arg;
 728        int err;
 729        struct pollfd console_poll[MAX_CONSOLE_FD];
 730        int pty_fd;
 731        char *pts_name;
 732        ssize_t len;
 733        struct mic_vring tx_vr, rx_vr;
 734        struct mic_copy_desc copy;
 735        struct mic_device_desc *desc;
 736
 737        pty_fd = posix_openpt(O_RDWR);
 738        if (pty_fd < 0) {
 739                mpsslog("can't open a pseudoterminal master device: %s\n",
 740                        strerror(errno));
 741                goto _return;
 742        }
 743        pts_name = ptsname(pty_fd);
 744        if (pts_name == NULL) {
 745                mpsslog("can't get pts name\n");
 746                goto _close_pty;
 747        }
 748        printf("%s console message goes to %s\n", mic->name, pts_name);
 749        mpsslog("%s console message goes to %s\n", mic->name, pts_name);
 750        err = grantpt(pty_fd);
 751        if (err < 0) {
 752                mpsslog("can't grant access: %s %s\n",
 753                        pts_name, strerror(errno));
 754                goto _close_pty;
 755        }
 756        err = unlockpt(pty_fd);
 757        if (err < 0) {
 758                mpsslog("can't unlock a pseudoterminal: %s %s\n",
 759                        pts_name, strerror(errno));
 760                goto _close_pty;
 761        }
 762        console_poll[MONITOR_FD].fd = pty_fd;
 763        console_poll[MONITOR_FD].events = POLLIN;
 764
 765        console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
 766        console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
 767
 768        if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
 769                                  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
 770                virtcons_dev_page.dd.num_vq)) {
 771                mpsslog("%s init_vr failed %s\n",
 772                        mic->name, strerror(errno));
 773                goto _close_pty;
 774        }
 775
 776        copy.iovcnt = 1;
 777        desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
 778
 779        for (;;) {
 780                console_poll[MONITOR_FD].revents = 0;
 781                console_poll[VIRTIO_CONSOLE_FD].revents = 0;
 782                err = poll(console_poll, MAX_CONSOLE_FD, -1);
 783                if (err < 0) {
 784                        mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
 785                                strerror(errno));
 786                        continue;
 787                }
 788                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
 789                        wait_for_card_driver(mic,
 790                                             mic->mic_console.virtio_console_fd,
 791                                VIRTIO_ID_CONSOLE);
 792
 793                if (console_poll[MONITOR_FD].revents & POLLIN) {
 794                        copy.iov = iov0;
 795                        len = readv(pty_fd, copy.iov, copy.iovcnt);
 796                        if (len > 0) {
 797#ifdef DEBUG
 798                                disp_iovec(mic, copy, __func__, __LINE__);
 799                                mpsslog("%s %s %d read from tap 0x%lx\n",
 800                                        mic->name, __func__, __LINE__,
 801                                        len);
 802#endif
 803                                spin_for_descriptors(mic, &tx_vr);
 804                                txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
 805                                             &copy, len);
 806
 807                                err = mic_virtio_copy(mic,
 808                                        mic->mic_console.virtio_console_fd,
 809                                        &tx_vr, &copy);
 810                                if (err < 0) {
 811                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 812                                                mic->name, __func__, __LINE__,
 813                                                strerror(errno));
 814                                }
 815                                if (!err)
 816                                        verify_out_len(mic, &copy);
 817#ifdef DEBUG
 818                                disp_iovec(mic, copy, __func__, __LINE__);
 819                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 820                                        mic->name, __func__, __LINE__,
 821                                        sum_iovec_len(copy));
 822#endif
 823                                /* Reinitialize IOV for next run */
 824                                iov0->iov_len = PAGE_SIZE;
 825                        } else if (len < 0) {
 826                                disp_iovec(mic, &copy, __func__, __LINE__);
 827                                mpsslog("%s %s %d read failed %s ",
 828                                        mic->name, __func__, __LINE__,
 829                                        strerror(errno));
 830                                mpsslog("cnt %d sum %zd\n",
 831                                        copy.iovcnt, sum_iovec_len(&copy));
 832                        }
 833                }
 834
 835                if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
 836                        while (rx_vr.info->avail_idx !=
 837                                le16toh(rx_vr.vr.avail->idx)) {
 838                                copy.iov = iov1;
 839                                txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
 840                                             &copy, PAGE_SIZE);
 841
 842                                err = mic_virtio_copy(mic,
 843                                        mic->mic_console.virtio_console_fd,
 844                                        &rx_vr, &copy);
 845                                if (!err) {
 846                                        /* Set the correct output iov_len */
 847                                        iov1->iov_len = copy.out_len;
 848                                        verify_out_len(mic, &copy);
 849#ifdef DEBUG
 850                                        disp_iovec(mic, copy, __func__,
 851                                                   __LINE__);
 852                                        mpsslog("%s %s %d ",
 853                                                mic->name, __func__, __LINE__);
 854                                        mpsslog("read from net 0x%lx\n",
 855                                                sum_iovec_len(copy));
 856#endif
 857                                        len = writev(pty_fd,
 858                                                copy.iov, copy.iovcnt);
 859                                        if (len != sum_iovec_len(&copy)) {
 860                                                mpsslog("Tun write failed %s ",
 861                                                        strerror(errno));
 862                                                mpsslog("len 0x%zx ", len);
 863                                                mpsslog("read_len 0x%zx\n",
 864                                                        sum_iovec_len(&copy));
 865                                        } else {
 866#ifdef DEBUG
 867                                                disp_iovec(mic, copy, __func__,
 868                                                           __LINE__);
 869                                                mpsslog("%s %s %d ",
 870                                                        mic->name, __func__,
 871                                                        __LINE__);
 872                                                mpsslog("wrote to tap 0x%lx\n",
 873                                                        len);
 874#endif
 875                                        }
 876                                } else {
 877                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 878                                                mic->name, __func__, __LINE__,
 879                                                strerror(errno));
 880                                        break;
 881                                }
 882                        }
 883                }
 884                if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 885                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 886        }
 887_close_pty:
 888        close(pty_fd);
 889_return:
 890        pthread_exit(NULL);
 891}
 892
 893static void
 894add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
 895{
 896        char path[PATH_MAX];
 897        int fd, err;
 898
 899        snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
 900        fd = open(path, O_RDWR);
 901        if (fd < 0) {
 902                mpsslog("Could not open %s %s\n", path, strerror(errno));
 903                return;
 904        }
 905
 906        err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
 907        if (err < 0) {
 908                mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
 909                close(fd);
 910                return;
 911        }
 912        switch (dd->type) {
 913        case VIRTIO_ID_NET:
 914                mic->mic_net.virtio_net_fd = fd;
 915                mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
 916                break;
 917        case VIRTIO_ID_CONSOLE:
 918                mic->mic_console.virtio_console_fd = fd;
 919                mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
 920                break;
 921        case VIRTIO_ID_BLOCK:
 922                mic->mic_virtblk.virtio_block_fd = fd;
 923                mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
 924                break;
 925        }
 926}
 927
 928static bool
 929set_backend_file(struct mic_info *mic)
 930{
 931        FILE *config;
 932        char buff[PATH_MAX], *line, *evv, *p;
 933
 934        snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
 935        config = fopen(buff, "r");
 936        if (config == NULL)
 937                return false;
 938        do {  /* look for "virtblk_backend=XXXX" */
 939                line = fgets(buff, PATH_MAX, config);
 940                if (line == NULL)
 941                        break;
 942                if (*line == '#')
 943                        continue;
 944                p = strchr(line, '\n');
 945                if (p)
 946                        *p = '\0';
 947        } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
 948        fclose(config);
 949        if (line == NULL)
 950                return false;
 951        evv = strchr(line, '=');
 952        if (evv == NULL)
 953                return false;
 954        mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
 955        if (mic->mic_virtblk.backend_file == NULL) {
 956                mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
 957                return false;
 958        }
 959        strcpy(mic->mic_virtblk.backend_file, evv + 1);
 960        return true;
 961}
 962
 963#define SECTOR_SIZE 512
 964static bool
 965set_backend_size(struct mic_info *mic)
 966{
 967        mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
 968                SEEK_END);
 969        if (mic->mic_virtblk.backend_size < 0) {
 970                mpsslog("%s: can't seek: %s\n",
 971                        mic->name, mic->mic_virtblk.backend_file);
 972                return false;
 973        }
 974        virtblk_dev_page.blk_config.capacity =
 975                mic->mic_virtblk.backend_size / SECTOR_SIZE;
 976        if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
 977                virtblk_dev_page.blk_config.capacity++;
 978
 979        virtblk_dev_page.blk_config.capacity =
 980                htole64(virtblk_dev_page.blk_config.capacity);
 981
 982        return true;
 983}
 984
 985static bool
 986open_backend(struct mic_info *mic)
 987{
 988        if (!set_backend_file(mic))
 989                goto _error_exit;
 990        mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
 991        if (mic->mic_virtblk.backend < 0) {
 992                mpsslog("%s: can't open: %s\n", mic->name,
 993                        mic->mic_virtblk.backend_file);
 994                goto _error_free;
 995        }
 996        if (!set_backend_size(mic))
 997                goto _error_close;
 998        mic->mic_virtblk.backend_addr = mmap(NULL,
 999                mic->mic_virtblk.backend_size,
1000                PROT_READ|PROT_WRITE, MAP_SHARED,
1001                mic->mic_virtblk.backend, 0L);
1002        if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1003                mpsslog("%s: can't map: %s %s\n",
1004                        mic->name, mic->mic_virtblk.backend_file,
1005                        strerror(errno));
1006                goto _error_close;
1007        }
1008        return true;
1009
1010 _error_close:
1011        close(mic->mic_virtblk.backend);
1012 _error_free:
1013        free(mic->mic_virtblk.backend_file);
1014 _error_exit:
1015        return false;
1016}
1017
1018static void
1019close_backend(struct mic_info *mic)
1020{
1021        munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1022        close(mic->mic_virtblk.backend);
1023        free(mic->mic_virtblk.backend_file);
1024}
1025
1026static bool
1027start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1028{
1029        if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1030                mpsslog("%s: blk_config is not 8 byte aligned.\n",
1031                        mic->name);
1032                return false;
1033        }
1034        add_virtio_device(mic, &virtblk_dev_page.dd);
1035        if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1036                                  VIRTIO_ID_BLOCK, vring, NULL,
1037                                  virtblk_dev_page.dd.num_vq)) {
1038                mpsslog("%s init_vr failed %s\n",
1039                        mic->name, strerror(errno));
1040                return false;
1041        }
1042        return true;
1043}
1044
1045static void
1046stop_virtblk(struct mic_info *mic)
1047{
1048        int vr_size, ret;
1049
1050        vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
1051                MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
1052        ret = munmap(mic->mic_virtblk.block_dp,
1053                MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1054        if (ret < 0)
1055                mpsslog("%s munmap errno %d\n", mic->name, errno);
1056        close(mic->mic_virtblk.virtio_block_fd);
1057}
1058
1059static __u8
1060header_error_check(struct vring_desc *desc)
1061{
1062        if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1063                mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1064                        __func__, __LINE__);
1065                return -EIO;
1066        }
1067        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1068                mpsslog("%s() %d: alone\n",
1069                        __func__, __LINE__);
1070                return -EIO;
1071        }
1072        if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1073                mpsslog("%s() %d: not read\n",
1074                        __func__, __LINE__);
1075                return -EIO;
1076        }
1077        return 0;
1078}
1079
1080static int
1081read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1082{
1083        struct iovec iovec;
1084        struct mic_copy_desc copy;
1085
1086        iovec.iov_len = sizeof(*hdr);
1087        iovec.iov_base = hdr;
1088        copy.iov = &iovec;
1089        copy.iovcnt = 1;
1090        copy.vr_idx = 0;  /* only one vring on virtio_block */
1091        copy.update_used = false;  /* do not update used index */
1092        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1093}
1094
1095static int
1096transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1097{
1098        struct mic_copy_desc copy;
1099
1100        copy.iov = iovec;
1101        copy.iovcnt = iovcnt;
1102        copy.vr_idx = 0;  /* only one vring on virtio_block */
1103        copy.update_used = false;  /* do not update used index */
1104        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1105}
1106
1107static __u8
1108status_error_check(struct vring_desc *desc)
1109{
1110        if (le32toh(desc->len) != sizeof(__u8)) {
1111                mpsslog("%s() %d: length is not sizeof(status)\n",
1112                        __func__, __LINE__);
1113                return -EIO;
1114        }
1115        return 0;
1116}
1117
1118static int
1119write_status(int fd, __u8 *status)
1120{
1121        struct iovec iovec;
1122        struct mic_copy_desc copy;
1123
1124        iovec.iov_base = status;
1125        iovec.iov_len = sizeof(*status);
1126        copy.iov = &iovec;
1127        copy.iovcnt = 1;
1128        copy.vr_idx = 0;  /* only one vring on virtio_block */
1129        copy.update_used = true; /* Update used index */
1130        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1131}
1132
1133static void *
1134virtio_block(void *arg)
1135{
1136        struct mic_info *mic = (struct mic_info *)arg;
1137        int ret;
1138        struct pollfd block_poll;
1139        struct mic_vring vring;
1140        __u16 avail_idx;
1141        __u32 desc_idx;
1142        struct vring_desc *desc;
1143        struct iovec *iovec, *piov;
1144        __u8 status;
1145        __u32 buffer_desc_idx;
1146        struct virtio_blk_outhdr hdr;
1147        void *fos;
1148
1149        for (;;) {  /* forever */
1150                if (!open_backend(mic)) { /* No virtblk */
1151                        for (mic->mic_virtblk.signaled = 0;
1152                                !mic->mic_virtblk.signaled;)
1153                                sleep(1);
1154                        continue;
1155                }
1156
1157                /* backend file is specified. */
1158                if (!start_virtblk(mic, &vring))
1159                        goto _close_backend;
1160                iovec = malloc(sizeof(*iovec) *
1161                        le32toh(virtblk_dev_page.blk_config.seg_max));
1162                if (!iovec) {
1163                        mpsslog("%s: can't alloc iovec: %s\n",
1164                                mic->name, strerror(ENOMEM));
1165                        goto _stop_virtblk;
1166                }
1167
1168                block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1169                block_poll.events = POLLIN;
1170                for (mic->mic_virtblk.signaled = 0;
1171                     !mic->mic_virtblk.signaled;) {
1172                        block_poll.revents = 0;
1173                                        /* timeout in 1 sec to see signaled */
1174                        ret = poll(&block_poll, 1, 1000);
1175                        if (ret < 0) {
1176                                mpsslog("%s %d: poll failed: %s\n",
1177                                        __func__, __LINE__,
1178                                        strerror(errno));
1179                                continue;
1180                        }
1181
1182                        if (!(block_poll.revents & POLLIN)) {
1183#ifdef DEBUG
1184                                mpsslog("%s %d: block_poll.revents=0x%x\n",
1185                                        __func__, __LINE__, block_poll.revents);
1186#endif
1187                                continue;
1188                        }
1189
1190                        /* POLLIN */
1191                        while (vring.info->avail_idx !=
1192                                le16toh(vring.vr.avail->idx)) {
1193                                /* read header element */
1194                                avail_idx =
1195                                        vring.info->avail_idx &
1196                                        (vring.vr.num - 1);
1197                                desc_idx = le16toh(
1198                                        vring.vr.avail->ring[avail_idx]);
1199                                desc = &vring.vr.desc[desc_idx];
1200#ifdef DEBUG
1201                                mpsslog("%s() %d: avail_idx=%d ",
1202                                        __func__, __LINE__,
1203                                        vring.info->avail_idx);
1204                                mpsslog("vring.vr.num=%d desc=%p\n",
1205                                        vring.vr.num, desc);
1206#endif
1207                                status = header_error_check(desc);
1208                                ret = read_header(
1209                                        mic->mic_virtblk.virtio_block_fd,
1210                                        &hdr, desc_idx);
1211                                if (ret < 0) {
1212                                        mpsslog("%s() %d %s: ret=%d %s\n",
1213                                                __func__, __LINE__,
1214                                                mic->name, ret,
1215                                                strerror(errno));
1216                                        break;
1217                                }
1218                                /* buffer element */
1219                                piov = iovec;
1220                                status = 0;
1221                                fos = mic->mic_virtblk.backend_addr +
1222                                        (hdr.sector * SECTOR_SIZE);
1223                                buffer_desc_idx = next_desc(desc);
1224                                desc_idx = buffer_desc_idx;
1225                                for (desc = &vring.vr.desc[buffer_desc_idx];
1226                                     desc->flags & VRING_DESC_F_NEXT;
1227                                     desc_idx = next_desc(desc),
1228                                             desc = &vring.vr.desc[desc_idx]) {
1229                                        piov->iov_len = desc->len;
1230                                        piov->iov_base = fos;
1231                                        piov++;
1232                                        fos += desc->len;
1233                                }
1234                                /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1235                                if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1236                                        VIRTIO_BLK_T_GET_ID)) {
1237                                        /*
1238                                          VIRTIO_BLK_T_IN - does not do
1239                                          anything. Probably for documenting.
1240                                          VIRTIO_BLK_T_SCSI_CMD - for
1241                                          virtio_scsi.
1242                                          VIRTIO_BLK_T_FLUSH - turned off in
1243                                          config space.
1244                                          VIRTIO_BLK_T_BARRIER - defined but not
1245                                          used in anywhere.
1246                                        */
1247                                        mpsslog("%s() %d: type %x ",
1248                                                __func__, __LINE__,
1249                                                hdr.type);
1250                                        mpsslog("is not supported\n");
1251                                        status = -ENOTSUP;
1252
1253                                } else {
1254                                        ret = transfer_blocks(
1255                                        mic->mic_virtblk.virtio_block_fd,
1256                                                iovec,
1257                                                piov - iovec);
1258                                        if (ret < 0 &&
1259                                            status != 0)
1260                                                status = ret;
1261                                }
1262                                /* write status and update used pointer */
1263                                if (status != 0)
1264                                        status = status_error_check(desc);
1265                                ret = write_status(
1266                                        mic->mic_virtblk.virtio_block_fd,
1267                                        &status);
1268#ifdef DEBUG
1269                                mpsslog("%s() %d: write status=%d on desc=%p\n",
1270                                        __func__, __LINE__,
1271                                        status, desc);
1272#endif
1273                        }
1274                }
1275                free(iovec);
1276_stop_virtblk:
1277                stop_virtblk(mic);
1278_close_backend:
1279                close_backend(mic);
1280        }  /* forever */
1281
1282        pthread_exit(NULL);
1283}
1284
1285static void
1286reset(struct mic_info *mic)
1287{
1288#define RESET_TIMEOUT 120
1289        int i = RESET_TIMEOUT;
1290        setsysfs(mic->name, "state", "reset");
1291        while (i) {
1292                char *state;
1293                state = readsysfs(mic->name, "state");
1294                if (!state)
1295                        goto retry;
1296                mpsslog("%s: %s %d state %s\n",
1297                        mic->name, __func__, __LINE__, state);
1298
1299                /*
1300                 * If the shutdown was initiated by OSPM, the state stays
1301                 * in "suspended" which is also a valid condition for reset.
1302                 */
1303                if ((!strcmp(state, "offline")) ||
1304                    (!strcmp(state, "suspended"))) {
1305                        free(state);
1306                        break;
1307                }
1308                free(state);
1309retry:
1310                sleep(1);
1311                i--;
1312        }
1313}
1314
1315static int
1316get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1317{
1318        if (!strcmp(shutdown_status, "nop"))
1319                return MIC_NOP;
1320        if (!strcmp(shutdown_status, "crashed"))
1321                return MIC_CRASHED;
1322        if (!strcmp(shutdown_status, "halted"))
1323                return MIC_HALTED;
1324        if (!strcmp(shutdown_status, "poweroff"))
1325                return MIC_POWER_OFF;
1326        if (!strcmp(shutdown_status, "restart"))
1327                return MIC_RESTART;
1328        mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1329        /* Invalid state */
1330        assert(0);
1331};
1332
1333static int get_mic_state(struct mic_info *mic, char *state)
1334{
1335        if (!strcmp(state, "offline"))
1336                return MIC_OFFLINE;
1337        if (!strcmp(state, "online"))
1338                return MIC_ONLINE;
1339        if (!strcmp(state, "shutting_down"))
1340                return MIC_SHUTTING_DOWN;
1341        if (!strcmp(state, "reset_failed"))
1342                return MIC_RESET_FAILED;
1343        if (!strcmp(state, "suspending"))
1344                return MIC_SUSPENDING;
1345        if (!strcmp(state, "suspended"))
1346                return MIC_SUSPENDED;
1347        mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1348        /* Invalid state */
1349        assert(0);
1350};
1351
1352static void mic_handle_shutdown(struct mic_info *mic)
1353{
1354#define SHUTDOWN_TIMEOUT 60
1355        int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
1356        char *shutdown_status;
1357        while (i) {
1358                shutdown_status = readsysfs(mic->name, "shutdown_status");
1359                if (!shutdown_status)
1360                        continue;
1361                mpsslog("%s: %s %d shutdown_status %s\n",
1362                        mic->name, __func__, __LINE__, shutdown_status);
1363                switch (get_mic_shutdown_status(mic, shutdown_status)) {
1364                case MIC_RESTART:
1365                        mic->restart = 1;
1366                case MIC_HALTED:
1367                case MIC_POWER_OFF:
1368                case MIC_CRASHED:
1369                        free(shutdown_status);
1370                        goto reset;
1371                default:
1372                        break;
1373                }
1374                free(shutdown_status);
1375                sleep(1);
1376                i--;
1377        }
1378reset:
1379        ret = kill(mic->pid, SIGTERM);
1380        mpsslog("%s: %s %d kill pid %d ret %d\n",
1381                mic->name, __func__, __LINE__,
1382                mic->pid, ret);
1383        if (!ret) {
1384                ret = waitpid(mic->pid, &stat,
1385                        WIFSIGNALED(stat));
1386                mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1387                        mic->name, __func__, __LINE__,
1388                        ret, mic->pid);
1389        }
1390        if (ret == mic->pid)
1391                reset(mic);
1392}
1393
1394static void *
1395mic_config(void *arg)
1396{
1397        struct mic_info *mic = (struct mic_info *)arg;
1398        char *state = NULL;
1399        char pathname[PATH_MAX];
1400        int fd, ret;
1401        struct pollfd ufds[1];
1402        char value[4096];
1403
1404        snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1405                 MICSYSFSDIR, mic->name, "state");
1406
1407        fd = open(pathname, O_RDONLY);
1408        if (fd < 0) {
1409                mpsslog("%s: opening file %s failed %s\n",
1410                        mic->name, pathname, strerror(errno));
1411                goto error;
1412        }
1413
1414        do {
1415                ret = lseek(fd, 0, SEEK_SET);
1416                if (ret < 0) {
1417                        mpsslog("%s: Failed to seek to file start '%s': %s\n",
1418                                mic->name, pathname, strerror(errno));
1419                        goto close_error1;
1420                }
1421                ret = read(fd, value, sizeof(value));
1422                if (ret < 0) {
1423                        mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
1424                                mic->name, pathname, strerror(errno));
1425                        goto close_error1;
1426                }
1427retry:
1428                state = readsysfs(mic->name, "state");
1429                if (!state)
1430                        goto retry;
1431                mpsslog("%s: %s %d state %s\n",
1432                        mic->name, __func__, __LINE__, state);
1433                switch (get_mic_state(mic, state)) {
1434                case MIC_SHUTTING_DOWN:
1435                        mic_handle_shutdown(mic);
1436                        goto close_error;
1437                case MIC_SUSPENDING:
1438                        mic->boot_on_resume = 1;
1439                        setsysfs(mic->name, "state", "suspend");
1440                        mic_handle_shutdown(mic);
1441                        goto close_error;
1442                case MIC_OFFLINE:
1443                        if (mic->boot_on_resume) {
1444                                setsysfs(mic->name, "state", "boot");
1445                                mic->boot_on_resume = 0;
1446                        }
1447                        break;
1448                default:
1449                        break;
1450                }
1451                free(state);
1452
1453                ufds[0].fd = fd;
1454                ufds[0].events = POLLERR | POLLPRI;
1455                ret = poll(ufds, 1, -1);
1456                if (ret < 0) {
1457                        mpsslog("%s: poll failed %s\n",
1458                                mic->name, strerror(errno));
1459                        goto close_error1;
1460                }
1461        } while (1);
1462close_error:
1463        free(state);
1464close_error1:
1465        close(fd);
1466error:
1467        init_mic(mic);
1468        pthread_exit(NULL);
1469}
1470
1471static void
1472set_cmdline(struct mic_info *mic)
1473{
1474        char buffer[PATH_MAX];
1475        int len;
1476
1477        len = snprintf(buffer, PATH_MAX,
1478                "clocksource=tsc highres=off nohz=off ");
1479        len += snprintf(buffer + len, PATH_MAX,
1480                "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1481        len += snprintf(buffer + len, PATH_MAX,
1482                "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1483                mic->id);
1484
1485        setsysfs(mic->name, "cmdline", buffer);
1486        mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1487        snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
1488        mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1489}
1490
1491static void
1492set_log_buf_info(struct mic_info *mic)
1493{
1494        int fd;
1495        off_t len;
1496        char system_map[] = "/lib/firmware/mic/System.map";
1497        char *map, *temp, log_buf[17] = {'\0'};
1498
1499        fd = open(system_map, O_RDONLY);
1500        if (fd < 0) {
1501                mpsslog("%s: Opening System.map failed: %d\n",
1502                        mic->name, errno);
1503                return;
1504        }
1505        len = lseek(fd, 0, SEEK_END);
1506        if (len < 0) {
1507                mpsslog("%s: Reading System.map size failed: %d\n",
1508                        mic->name, errno);
1509                close(fd);
1510                return;
1511        }
1512        map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1513        if (map == MAP_FAILED) {
1514                mpsslog("%s: mmap of System.map failed: %d\n",
1515                        mic->name, errno);
1516                close(fd);
1517                return;
1518        }
1519        temp = strstr(map, "__log_buf");
1520        if (!temp) {
1521                mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1522                munmap(map, len);
1523                close(fd);
1524                return;
1525        }
1526        strncpy(log_buf, temp - 19, 16);
1527        setsysfs(mic->name, "log_buf_addr", log_buf);
1528        mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1529        temp = strstr(map, "log_buf_len");
1530        if (!temp) {
1531                mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1532                munmap(map, len);
1533                close(fd);
1534                return;
1535        }
1536        strncpy(log_buf, temp - 19, 16);
1537        setsysfs(mic->name, "log_buf_len", log_buf);
1538        mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1539        munmap(map, len);
1540        close(fd);
1541}
1542
1543static void init_mic(struct mic_info *mic);
1544
1545static void
1546change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1547{
1548        struct mic_info *mic;
1549
1550        for (mic = mic_list.next; mic != NULL; mic = mic->next)
1551                mic->mic_virtblk.signaled = 1/* true */;
1552}
1553
1554static void
1555init_mic(struct mic_info *mic)
1556{
1557        struct sigaction ignore = {
1558                .sa_flags = 0,
1559                .sa_handler = SIG_IGN
1560        };
1561        struct sigaction act = {
1562                .sa_flags = SA_SIGINFO,
1563                .sa_sigaction = change_virtblk_backend,
1564        };
1565        char buffer[PATH_MAX];
1566        int err;
1567
1568        /*
1569         * Currently, one virtio block device is supported for each MIC card
1570         * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1571         * The signal informs the virtio block backend about a change in the
1572         * configuration file which specifies the virtio backend file name on
1573         * the host. Virtio block backend then re-reads the configuration file
1574         * and switches to the new block device. This signalling mechanism may
1575         * not be required once multiple virtio block devices are supported by
1576         * the MIC daemon.
1577         */
1578        sigaction(SIGUSR1, &ignore, NULL);
1579
1580        mic->pid = fork();
1581        switch (mic->pid) {
1582        case 0:
1583                set_log_buf_info(mic);
1584                set_cmdline(mic);
1585                add_virtio_device(mic, &virtcons_dev_page.dd);
1586                add_virtio_device(mic, &virtnet_dev_page.dd);
1587                err = pthread_create(&mic->mic_console.console_thread, NULL,
1588                        virtio_console, mic);
1589                if (err)
1590                        mpsslog("%s virtcons pthread_create failed %s\n",
1591                                mic->name, strerror(err));
1592                err = pthread_create(&mic->mic_net.net_thread, NULL,
1593                        virtio_net, mic);
1594                if (err)
1595                        mpsslog("%s virtnet pthread_create failed %s\n",
1596                                mic->name, strerror(err));
1597                err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1598                        virtio_block, mic);
1599                if (err)
1600                        mpsslog("%s virtblk pthread_create failed %s\n",
1601                                mic->name, strerror(err));
1602                sigemptyset(&act.sa_mask);
1603                err = sigaction(SIGUSR1, &act, NULL);
1604                if (err)
1605                        mpsslog("%s sigaction SIGUSR1 failed %s\n",
1606                                mic->name, strerror(errno));
1607                while (1)
1608                        sleep(60);
1609        case -1:
1610                mpsslog("fork failed MIC name %s id %d errno %d\n",
1611                        mic->name, mic->id, errno);
1612                break;
1613        default:
1614                if (mic->restart) {
1615                        snprintf(buffer, PATH_MAX, "boot");
1616                        setsysfs(mic->name, "state", buffer);
1617                        mpsslog("%s restarting mic %d\n",
1618                                mic->name, mic->restart);
1619                        mic->restart = 0;
1620                }
1621                pthread_create(&mic->config_thread, NULL, mic_config, mic);
1622        }
1623}
1624
1625static void
1626start_daemon(void)
1627{
1628        struct mic_info *mic;
1629
1630        for (mic = mic_list.next; mic != NULL; mic = mic->next)
1631                init_mic(mic);
1632
1633        while (1)
1634                sleep(60);
1635}
1636
1637static int
1638init_mic_list(void)
1639{
1640        struct mic_info *mic = &mic_list;
1641        struct dirent *file;
1642        DIR *dp;
1643        int cnt = 0;
1644
1645        dp = opendir(MICSYSFSDIR);
1646        if (!dp)
1647                return 0;
1648
1649        while ((file = readdir(dp)) != NULL) {
1650                if (!strncmp(file->d_name, "mic", 3)) {
1651                        mic->next = calloc(1, sizeof(struct mic_info));
1652                        if (mic->next) {
1653                                mic = mic->next;
1654                                mic->id = atoi(&file->d_name[3]);
1655                                mic->name = malloc(strlen(file->d_name) + 16);
1656                                if (mic->name)
1657                                        strcpy(mic->name, file->d_name);
1658                                mpsslog("MIC name %s id %d\n", mic->name,
1659                                        mic->id);
1660                                cnt++;
1661                        }
1662                }
1663        }
1664
1665        closedir(dp);
1666        return cnt;
1667}
1668
1669void
1670mpsslog(char *format, ...)
1671{
1672        va_list args;
1673        char buffer[4096];
1674        char ts[52], *ts1;
1675        time_t t;
1676
1677        if (logfp == NULL)
1678                return;
1679
1680        va_start(args, format);
1681        vsprintf(buffer, format, args);
1682        va_end(args);
1683
1684        time(&t);
1685        ts1 = ctime_r(&t, ts);
1686        ts1[strlen(ts1) - 1] = '\0';
1687        fprintf(logfp, "%s: %s", ts1, buffer);
1688
1689        fflush(logfp);
1690}
1691
1692int
1693main(int argc, char *argv[])
1694{
1695        int cnt;
1696        pid_t pid;
1697
1698        myname = argv[0];
1699
1700        logfp = fopen(LOGFILE_NAME, "a+");
1701        if (!logfp) {
1702                fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1703                exit(1);
1704        }
1705        pid = fork();
1706        switch (pid) {
1707        case 0:
1708                break;
1709        case -1:
1710                exit(2);
1711        default:
1712                exit(0);
1713        }
1714
1715        mpsslog("MIC Daemon start\n");
1716
1717        cnt = init_mic_list();
1718        if (cnt == 0) {
1719                mpsslog("MIC module not loaded\n");
1720                exit(3);
1721        }
1722        mpsslog("MIC found %d devices\n", cnt);
1723
1724        start_daemon();
1725
1726        exit(0);
1727}
1728