linux/Documentation/mic/mpssd/mpssd.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2013 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * The full GNU General Public License is included in this distribution in
  16 * the file called "COPYING".
  17 *
  18 * Intel MIC User Space Tools.
  19 */
  20
  21#define _GNU_SOURCE
  22
  23#include <stdlib.h>
  24#include <fcntl.h>
  25#include <getopt.h>
  26#include <assert.h>
  27#include <unistd.h>
  28#include <stdbool.h>
  29#include <signal.h>
  30#include <poll.h>
  31#include <features.h>
  32#include <sys/types.h>
  33#include <sys/stat.h>
  34#include <sys/mman.h>
  35#include <sys/socket.h>
  36#include <linux/virtio_ring.h>
  37#include <linux/virtio_net.h>
  38#include <linux/virtio_console.h>
  39#include <linux/virtio_blk.h>
  40#include <linux/version.h>
  41#include "mpssd.h"
  42#include <linux/mic_ioctl.h>
  43#include <linux/mic_common.h>
  44#include <tools/endian.h>
  45
  46static void *init_mic(void *arg);
  47
  48static FILE *logfp;
  49static struct mic_info mic_list;
  50
  51#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  52
  53#define min_t(type, x, y) ({                            \
  54                type __min1 = (x);                      \
  55                type __min2 = (y);                      \
  56                __min1 < __min2 ? __min1 : __min2; })
  57
  58/* align addr on a size boundary - adjust address up/down if needed */
  59#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
  60#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
  61
  62/* align addr on a size boundary - adjust address up if needed */
  63#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
  64
  65/* to align the pointer to the (next) page boundary */
  66#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
  67
  68#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  69
  70#define GSO_ENABLED             1
  71#define MAX_GSO_SIZE            (64 * 1024)
  72#define ETH_H_LEN               14
  73#define MAX_NET_PKT_SIZE        (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
  74#define MIC_DEVICE_PAGE_END     0x1000
  75
  76#ifndef VIRTIO_NET_HDR_F_DATA_VALID
  77#define VIRTIO_NET_HDR_F_DATA_VALID     2       /* Csum is valid */
  78#endif
  79
  80static struct {
  81        struct mic_device_desc dd;
  82        struct mic_vqconfig vqconfig[2];
  83        __u32 host_features, guest_acknowledgements;
  84        struct virtio_console_config cons_config;
  85} virtcons_dev_page = {
  86        .dd = {
  87                .type = VIRTIO_ID_CONSOLE,
  88                .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
  89                .feature_len = sizeof(virtcons_dev_page.host_features),
  90                .config_len = sizeof(virtcons_dev_page.cons_config),
  91        },
  92        .vqconfig[0] = {
  93                .num = htole16(MIC_VRING_ENTRIES),
  94        },
  95        .vqconfig[1] = {
  96                .num = htole16(MIC_VRING_ENTRIES),
  97        },
  98};
  99
 100static struct {
 101        struct mic_device_desc dd;
 102        struct mic_vqconfig vqconfig[2];
 103        __u32 host_features, guest_acknowledgements;
 104        struct virtio_net_config net_config;
 105} virtnet_dev_page = {
 106        .dd = {
 107                .type = VIRTIO_ID_NET,
 108                .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
 109                .feature_len = sizeof(virtnet_dev_page.host_features),
 110                .config_len = sizeof(virtnet_dev_page.net_config),
 111        },
 112        .vqconfig[0] = {
 113                .num = htole16(MIC_VRING_ENTRIES),
 114        },
 115        .vqconfig[1] = {
 116                .num = htole16(MIC_VRING_ENTRIES),
 117        },
 118#if GSO_ENABLED
 119        .host_features = htole32(
 120                1 << VIRTIO_NET_F_CSUM |
 121                1 << VIRTIO_NET_F_GSO |
 122                1 << VIRTIO_NET_F_GUEST_TSO4 |
 123                1 << VIRTIO_NET_F_GUEST_TSO6 |
 124                1 << VIRTIO_NET_F_GUEST_ECN),
 125#else
 126                .host_features = 0,
 127#endif
 128};
 129
 130static const char *mic_config_dir = "/etc/mpss";
 131static const char *virtblk_backend = "VIRTBLK_BACKEND";
 132static struct {
 133        struct mic_device_desc dd;
 134        struct mic_vqconfig vqconfig[1];
 135        __u32 host_features, guest_acknowledgements;
 136        struct virtio_blk_config blk_config;
 137} virtblk_dev_page = {
 138        .dd = {
 139                .type = VIRTIO_ID_BLOCK,
 140                .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
 141                .feature_len = sizeof(virtblk_dev_page.host_features),
 142                .config_len = sizeof(virtblk_dev_page.blk_config),
 143        },
 144        .vqconfig[0] = {
 145                .num = htole16(MIC_VRING_ENTRIES),
 146        },
 147        .host_features =
 148                htole32(1<<VIRTIO_BLK_F_SEG_MAX),
 149        .blk_config = {
 150                .seg_max = htole32(MIC_VRING_ENTRIES - 2),
 151                .capacity = htole64(0),
 152         }
 153};
 154
 155static char *myname;
 156
 157static int
 158tap_configure(struct mic_info *mic, char *dev)
 159{
 160        pid_t pid;
 161        char *ifargv[7];
 162        char ipaddr[IFNAMSIZ];
 163        int ret = 0;
 164
 165        pid = fork();
 166        if (pid == 0) {
 167                ifargv[0] = "ip";
 168                ifargv[1] = "link";
 169                ifargv[2] = "set";
 170                ifargv[3] = dev;
 171                ifargv[4] = "up";
 172                ifargv[5] = NULL;
 173                mpsslog("Configuring %s\n", dev);
 174                ret = execvp("ip", ifargv);
 175                if (ret < 0) {
 176                        mpsslog("%s execvp failed errno %s\n",
 177                                mic->name, strerror(errno));
 178                        return ret;
 179                }
 180        }
 181        if (pid < 0) {
 182                mpsslog("%s fork failed errno %s\n",
 183                        mic->name, strerror(errno));
 184                return ret;
 185        }
 186
 187        ret = waitpid(pid, NULL, 0);
 188        if (ret < 0) {
 189                mpsslog("%s waitpid failed errno %s\n",
 190                        mic->name, strerror(errno));
 191                return ret;
 192        }
 193
 194        snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
 195
 196        pid = fork();
 197        if (pid == 0) {
 198                ifargv[0] = "ip";
 199                ifargv[1] = "addr";
 200                ifargv[2] = "add";
 201                ifargv[3] = ipaddr;
 202                ifargv[4] = "dev";
 203                ifargv[5] = dev;
 204                ifargv[6] = NULL;
 205                mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
 206                ret = execvp("ip", ifargv);
 207                if (ret < 0) {
 208                        mpsslog("%s execvp failed errno %s\n",
 209                                mic->name, strerror(errno));
 210                        return ret;
 211                }
 212        }
 213        if (pid < 0) {
 214                mpsslog("%s fork failed errno %s\n",
 215                        mic->name, strerror(errno));
 216                return ret;
 217        }
 218
 219        ret = waitpid(pid, NULL, 0);
 220        if (ret < 0) {
 221                mpsslog("%s waitpid failed errno %s\n",
 222                        mic->name, strerror(errno));
 223                return ret;
 224        }
 225        mpsslog("MIC name %s %s %d DONE!\n",
 226                mic->name, __func__, __LINE__);
 227        return 0;
 228}
 229
 230static int tun_alloc(struct mic_info *mic, char *dev)
 231{
 232        struct ifreq ifr;
 233        int fd, err;
 234#if GSO_ENABLED
 235        unsigned offload;
 236#endif
 237        fd = open("/dev/net/tun", O_RDWR);
 238        if (fd < 0) {
 239                mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
 240                goto done;
 241        }
 242
 243        memset(&ifr, 0, sizeof(ifr));
 244
 245        ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 246        if (*dev)
 247                strncpy(ifr.ifr_name, dev, IFNAMSIZ);
 248
 249        err = ioctl(fd, TUNSETIFF, (void *)&ifr);
 250        if (err < 0) {
 251                mpsslog("%s %s %d TUNSETIFF failed %s\n",
 252                        mic->name, __func__, __LINE__, strerror(errno));
 253                close(fd);
 254                return err;
 255        }
 256#if GSO_ENABLED
 257        offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
 258
 259        err = ioctl(fd, TUNSETOFFLOAD, offload);
 260        if (err < 0) {
 261                mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
 262                        mic->name, __func__, __LINE__, strerror(errno));
 263                close(fd);
 264                return err;
 265        }
 266#endif
 267        strcpy(dev, ifr.ifr_name);
 268        mpsslog("Created TAP %s\n", dev);
 269done:
 270        return fd;
 271}
 272
 273#define NET_FD_VIRTIO_NET 0
 274#define NET_FD_TUN 1
 275#define MAX_NET_FD 2
 276
 277static void set_dp(struct mic_info *mic, int type, void *dp)
 278{
 279        switch (type) {
 280        case VIRTIO_ID_CONSOLE:
 281                mic->mic_console.console_dp = dp;
 282                return;
 283        case VIRTIO_ID_NET:
 284                mic->mic_net.net_dp = dp;
 285                return;
 286        case VIRTIO_ID_BLOCK:
 287                mic->mic_virtblk.block_dp = dp;
 288                return;
 289        }
 290        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 291        assert(0);
 292}
 293
 294static void *get_dp(struct mic_info *mic, int type)
 295{
 296        switch (type) {
 297        case VIRTIO_ID_CONSOLE:
 298                return mic->mic_console.console_dp;
 299        case VIRTIO_ID_NET:
 300                return mic->mic_net.net_dp;
 301        case VIRTIO_ID_BLOCK:
 302                return mic->mic_virtblk.block_dp;
 303        }
 304        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 305        assert(0);
 306        return NULL;
 307}
 308
 309static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
 310{
 311        struct mic_device_desc *d;
 312        int i;
 313        void *dp = get_dp(mic, type);
 314
 315        for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
 316                i += mic_total_desc_size(d)) {
 317                d = dp + i;
 318
 319                /* End of list */
 320                if (d->type == 0)
 321                        break;
 322
 323                if (d->type == -1)
 324                        continue;
 325
 326                mpsslog("%s %s d-> type %d d %p\n",
 327                        mic->name, __func__, d->type, d);
 328
 329                if (d->type == (__u8)type)
 330                        return d;
 331        }
 332        mpsslog("%s %s %d not found\n", mic->name, __func__, type);
 333        return NULL;
 334}
 335
 336/* See comments in vhost.c for explanation of next_desc() */
 337static unsigned next_desc(struct vring_desc *desc)
 338{
 339        unsigned int next;
 340
 341        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
 342                return -1U;
 343        next = le16toh(desc->next);
 344        return next;
 345}
 346
 347/* Sum up all the IOVEC length */
 348static ssize_t
 349sum_iovec_len(struct mic_copy_desc *copy)
 350{
 351        ssize_t sum = 0;
 352        unsigned int i;
 353
 354        for (i = 0; i < copy->iovcnt; i++)
 355                sum += copy->iov[i].iov_len;
 356        return sum;
 357}
 358
 359static inline void verify_out_len(struct mic_info *mic,
 360        struct mic_copy_desc *copy)
 361{
 362        if (copy->out_len != sum_iovec_len(copy)) {
 363                mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
 364                        mic->name, __func__, __LINE__,
 365                        copy->out_len, sum_iovec_len(copy));
 366                assert(copy->out_len == sum_iovec_len(copy));
 367        }
 368}
 369
 370/* Display an iovec */
 371static void
 372disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
 373           const char *s, int line)
 374{
 375        unsigned int i;
 376
 377        for (i = 0; i < copy->iovcnt; i++)
 378                mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
 379                        mic->name, s, line, i,
 380                        copy->iov[i].iov_base, copy->iov[i].iov_len);
 381}
 382
 383static inline __u16 read_avail_idx(struct mic_vring *vr)
 384{
 385        return ACCESS_ONCE(vr->info->avail_idx);
 386}
 387
 388static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
 389                                struct mic_copy_desc *copy, ssize_t len)
 390{
 391        copy->vr_idx = tx ? 0 : 1;
 392        copy->update_used = true;
 393        if (type == VIRTIO_ID_NET)
 394                copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
 395        else
 396                copy->iov[0].iov_len = len;
 397}
 398
 399/* Central API which triggers the copies */
 400static int
 401mic_virtio_copy(struct mic_info *mic, int fd,
 402                struct mic_vring *vr, struct mic_copy_desc *copy)
 403{
 404        int ret;
 405
 406        ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
 407        if (ret) {
 408                mpsslog("%s %s %d errno %s ret %d\n",
 409                        mic->name, __func__, __LINE__,
 410                        strerror(errno), ret);
 411        }
 412        return ret;
 413}
 414
 415static inline unsigned _vring_size(unsigned int num, unsigned long align)
 416{
 417        return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
 418                                + align - 1) & ~(align - 1))
 419                + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
 420}
 421
 422/*
 423 * This initialization routine requires at least one
 424 * vring i.e. vr0. vr1 is optional.
 425 */
 426static void *
 427init_vr(struct mic_info *mic, int fd, int type,
 428        struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
 429{
 430        int vr_size;
 431        char *va;
 432
 433        vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
 434                                         MIC_VIRTIO_RING_ALIGN) +
 435                             sizeof(struct _mic_vring_info));
 436        va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
 437                PROT_READ, MAP_SHARED, fd, 0);
 438        if (MAP_FAILED == va) {
 439                mpsslog("%s %s %d mmap failed errno %s\n",
 440                        mic->name, __func__, __LINE__,
 441                        strerror(errno));
 442                goto done;
 443        }
 444        set_dp(mic, type, va);
 445        vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
 446        vr0->info = vr0->va +
 447                _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
 448        vring_init(&vr0->vr,
 449                   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
 450        mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
 451                __func__, mic->name, vr0->va, vr0->info, vr_size,
 452                _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 453        mpsslog("magic 0x%x expected 0x%x\n",
 454                le32toh(vr0->info->magic), MIC_MAGIC + type);
 455        assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
 456        if (vr1) {
 457                vr1->va = (struct mic_vring *)
 458                        &va[MIC_DEVICE_PAGE_END + vr_size];
 459                vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
 460                        MIC_VIRTIO_RING_ALIGN);
 461                vring_init(&vr1->vr,
 462                           MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
 463                mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
 464                        __func__, mic->name, vr1->va, vr1->info, vr_size,
 465                        _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
 466                mpsslog("magic 0x%x expected 0x%x\n",
 467                        le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
 468                assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
 469        }
 470done:
 471        return va;
 472}
 473
 474static int
 475wait_for_card_driver(struct mic_info *mic, int fd, int type)
 476{
 477        struct pollfd pollfd;
 478        int err;
 479        struct mic_device_desc *desc = get_device_desc(mic, type);
 480        __u8 prev_status;
 481
 482        if (!desc)
 483                return -ENODEV;
 484        prev_status = desc->status;
 485        pollfd.fd = fd;
 486        mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
 487                mic->name, __func__, type, desc->status);
 488
 489        while (1) {
 490                pollfd.events = POLLIN;
 491                pollfd.revents = 0;
 492                err = poll(&pollfd, 1, -1);
 493                if (err < 0) {
 494                        mpsslog("%s %s poll failed %s\n",
 495                                mic->name, __func__, strerror(errno));
 496                        continue;
 497                }
 498
 499                if (pollfd.revents) {
 500                        if (desc->status != prev_status) {
 501                                mpsslog("%s %s Waiting... desc-> type %d "
 502                                        "status 0x%x\n",
 503                                        mic->name, __func__, type,
 504                                        desc->status);
 505                                prev_status = desc->status;
 506                        }
 507                        if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
 508                                mpsslog("%s %s poll.revents %d\n",
 509                                        mic->name, __func__, pollfd.revents);
 510                                mpsslog("%s %s desc-> type %d status 0x%x\n",
 511                                        mic->name, __func__, type,
 512                                        desc->status);
 513                                break;
 514                        }
 515                }
 516        }
 517        return 0;
 518}
 519
 520/* Spin till we have some descriptors */
 521static void
 522spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
 523{
 524        __u16 avail_idx = read_avail_idx(vr);
 525
 526        while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
 527#ifdef DEBUG
 528                mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
 529                        mic->name, __func__,
 530                        le16toh(vr->vr.avail->idx), vr->info->avail_idx);
 531#endif
 532                sched_yield();
 533        }
 534}
 535
 536static void *
 537virtio_net(void *arg)
 538{
 539        static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
 540        static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
 541        struct iovec vnet_iov[2][2] = {
 542                { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
 543                  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
 544                { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
 545                  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
 546        };
 547        struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
 548        struct mic_info *mic = (struct mic_info *)arg;
 549        char if_name[IFNAMSIZ];
 550        struct pollfd net_poll[MAX_NET_FD];
 551        struct mic_vring tx_vr, rx_vr;
 552        struct mic_copy_desc copy;
 553        struct mic_device_desc *desc;
 554        int err;
 555
 556        snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
 557        mic->mic_net.tap_fd = tun_alloc(mic, if_name);
 558        if (mic->mic_net.tap_fd < 0)
 559                goto done;
 560
 561        if (tap_configure(mic, if_name))
 562                goto done;
 563        mpsslog("MIC name %s id %d\n", mic->name, mic->id);
 564
 565        net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
 566        net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
 567        net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
 568        net_poll[NET_FD_TUN].events = POLLIN;
 569
 570        if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
 571                                  VIRTIO_ID_NET, &tx_vr, &rx_vr,
 572                virtnet_dev_page.dd.num_vq)) {
 573                mpsslog("%s init_vr failed %s\n",
 574                        mic->name, strerror(errno));
 575                goto done;
 576        }
 577
 578        copy.iovcnt = 2;
 579        desc = get_device_desc(mic, VIRTIO_ID_NET);
 580
 581        while (1) {
 582                ssize_t len;
 583
 584                net_poll[NET_FD_VIRTIO_NET].revents = 0;
 585                net_poll[NET_FD_TUN].revents = 0;
 586
 587                /* Start polling for data from tap and virtio net */
 588                err = poll(net_poll, 2, -1);
 589                if (err < 0) {
 590                        mpsslog("%s poll failed %s\n",
 591                                __func__, strerror(errno));
 592                        continue;
 593                }
 594                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 595                        err = wait_for_card_driver(mic,
 596                                                   mic->mic_net.virtio_net_fd,
 597                                                   VIRTIO_ID_NET);
 598                        if (err) {
 599                                mpsslog("%s %s %d Exiting...\n",
 600                                        mic->name, __func__, __LINE__);
 601                                break;
 602                        }
 603                }
 604                /*
 605                 * Check if there is data to be read from TUN and write to
 606                 * virtio net fd if there is.
 607                 */
 608                if (net_poll[NET_FD_TUN].revents & POLLIN) {
 609                        copy.iov = iov0;
 610                        len = readv(net_poll[NET_FD_TUN].fd,
 611                                copy.iov, copy.iovcnt);
 612                        if (len > 0) {
 613                                struct virtio_net_hdr *hdr
 614                                        = (struct virtio_net_hdr *)vnet_hdr[0];
 615
 616                                /* Disable checksums on the card since we are on
 617                                   a reliable PCIe link */
 618                                hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
 619#ifdef DEBUG
 620                                mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
 621                                        __func__, __LINE__, hdr->flags);
 622                                mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
 623                                        copy.out_len, hdr->gso_type);
 624#endif
 625#ifdef DEBUG
 626                                disp_iovec(mic, copy, __func__, __LINE__);
 627                                mpsslog("%s %s %d read from tap 0x%lx\n",
 628                                        mic->name, __func__, __LINE__,
 629                                        len);
 630#endif
 631                                spin_for_descriptors(mic, &tx_vr);
 632                                txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
 633                                             len);
 634
 635                                err = mic_virtio_copy(mic,
 636                                        mic->mic_net.virtio_net_fd, &tx_vr,
 637                                        &copy);
 638                                if (err < 0) {
 639                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 640                                                mic->name, __func__, __LINE__,
 641                                                strerror(errno));
 642                                }
 643                                if (!err)
 644                                        verify_out_len(mic, &copy);
 645#ifdef DEBUG
 646                                disp_iovec(mic, copy, __func__, __LINE__);
 647                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 648                                        mic->name, __func__, __LINE__,
 649                                        sum_iovec_len(&copy));
 650#endif
 651                                /* Reinitialize IOV for next run */
 652                                iov0[1].iov_len = MAX_NET_PKT_SIZE;
 653                        } else if (len < 0) {
 654                                disp_iovec(mic, &copy, __func__, __LINE__);
 655                                mpsslog("%s %s %d read failed %s ", mic->name,
 656                                        __func__, __LINE__, strerror(errno));
 657                                mpsslog("cnt %d sum %zd\n",
 658                                        copy.iovcnt, sum_iovec_len(&copy));
 659                        }
 660                }
 661
 662                /*
 663                 * Check if there is data to be read from virtio net and
 664                 * write to TUN if there is.
 665                 */
 666                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
 667                        while (rx_vr.info->avail_idx !=
 668                                le16toh(rx_vr.vr.avail->idx)) {
 669                                copy.iov = iov1;
 670                                txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
 671                                             MAX_NET_PKT_SIZE
 672                                        + sizeof(struct virtio_net_hdr));
 673
 674                                err = mic_virtio_copy(mic,
 675                                        mic->mic_net.virtio_net_fd, &rx_vr,
 676                                        &copy);
 677                                if (!err) {
 678#ifdef DEBUG
 679                                        struct virtio_net_hdr *hdr
 680                                                = (struct virtio_net_hdr *)
 681                                                        vnet_hdr[1];
 682
 683                                        mpsslog("%s %s %d hdr->flags 0x%x, ",
 684                                                mic->name, __func__, __LINE__,
 685                                                hdr->flags);
 686                                        mpsslog("out_len %d gso_type 0x%x\n",
 687                                                copy.out_len,
 688                                                hdr->gso_type);
 689#endif
 690                                        /* Set the correct output iov_len */
 691                                        iov1[1].iov_len = copy.out_len -
 692                                                sizeof(struct virtio_net_hdr);
 693                                        verify_out_len(mic, &copy);
 694#ifdef DEBUG
 695                                        disp_iovec(mic, copy, __func__,
 696                                                   __LINE__);
 697                                        mpsslog("%s %s %d ",
 698                                                mic->name, __func__, __LINE__);
 699                                        mpsslog("read from net 0x%lx\n",
 700                                                sum_iovec_len(copy));
 701#endif
 702                                        len = writev(net_poll[NET_FD_TUN].fd,
 703                                                copy.iov, copy.iovcnt);
 704                                        if (len != sum_iovec_len(&copy)) {
 705                                                mpsslog("Tun write failed %s ",
 706                                                        strerror(errno));
 707                                                mpsslog("len 0x%zx ", len);
 708                                                mpsslog("read_len 0x%zx\n",
 709                                                        sum_iovec_len(&copy));
 710                                        } else {
 711#ifdef DEBUG
 712                                                disp_iovec(mic, &copy, __func__,
 713                                                           __LINE__);
 714                                                mpsslog("%s %s %d ",
 715                                                        mic->name, __func__,
 716                                                        __LINE__);
 717                                                mpsslog("wrote to tap 0x%lx\n",
 718                                                        len);
 719#endif
 720                                        }
 721                                } else {
 722                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 723                                                mic->name, __func__, __LINE__,
 724                                                strerror(errno));
 725                                        break;
 726                                }
 727                        }
 728                }
 729                if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 730                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 731        }
 732done:
 733        pthread_exit(NULL);
 734}
 735
 736/* virtio_console */
 737#define VIRTIO_CONSOLE_FD 0
 738#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
 739#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
 740#define MAX_BUFFER_SIZE PAGE_SIZE
 741
 742static void *
 743virtio_console(void *arg)
 744{
 745        static __u8 vcons_buf[2][PAGE_SIZE];
 746        struct iovec vcons_iov[2] = {
 747                { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
 748                { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
 749        };
 750        struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
 751        struct mic_info *mic = (struct mic_info *)arg;
 752        int err;
 753        struct pollfd console_poll[MAX_CONSOLE_FD];
 754        int pty_fd;
 755        char *pts_name;
 756        ssize_t len;
 757        struct mic_vring tx_vr, rx_vr;
 758        struct mic_copy_desc copy;
 759        struct mic_device_desc *desc;
 760
 761        pty_fd = posix_openpt(O_RDWR);
 762        if (pty_fd < 0) {
 763                mpsslog("can't open a pseudoterminal master device: %s\n",
 764                        strerror(errno));
 765                goto _return;
 766        }
 767        pts_name = ptsname(pty_fd);
 768        if (pts_name == NULL) {
 769                mpsslog("can't get pts name\n");
 770                goto _close_pty;
 771        }
 772        printf("%s console message goes to %s\n", mic->name, pts_name);
 773        mpsslog("%s console message goes to %s\n", mic->name, pts_name);
 774        err = grantpt(pty_fd);
 775        if (err < 0) {
 776                mpsslog("can't grant access: %s %s\n",
 777                        pts_name, strerror(errno));
 778                goto _close_pty;
 779        }
 780        err = unlockpt(pty_fd);
 781        if (err < 0) {
 782                mpsslog("can't unlock a pseudoterminal: %s %s\n",
 783                        pts_name, strerror(errno));
 784                goto _close_pty;
 785        }
 786        console_poll[MONITOR_FD].fd = pty_fd;
 787        console_poll[MONITOR_FD].events = POLLIN;
 788
 789        console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
 790        console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
 791
 792        if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
 793                                  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
 794                virtcons_dev_page.dd.num_vq)) {
 795                mpsslog("%s init_vr failed %s\n",
 796                        mic->name, strerror(errno));
 797                goto _close_pty;
 798        }
 799
 800        copy.iovcnt = 1;
 801        desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
 802
 803        for (;;) {
 804                console_poll[MONITOR_FD].revents = 0;
 805                console_poll[VIRTIO_CONSOLE_FD].revents = 0;
 806                err = poll(console_poll, MAX_CONSOLE_FD, -1);
 807                if (err < 0) {
 808                        mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
 809                                strerror(errno));
 810                        continue;
 811                }
 812                if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
 813                        err = wait_for_card_driver(mic,
 814                                        mic->mic_console.virtio_console_fd,
 815                                        VIRTIO_ID_CONSOLE);
 816                        if (err) {
 817                                mpsslog("%s %s %d Exiting...\n",
 818                                        mic->name, __func__, __LINE__);
 819                                break;
 820                        }
 821                }
 822
 823                if (console_poll[MONITOR_FD].revents & POLLIN) {
 824                        copy.iov = iov0;
 825                        len = readv(pty_fd, copy.iov, copy.iovcnt);
 826                        if (len > 0) {
 827#ifdef DEBUG
 828                                disp_iovec(mic, copy, __func__, __LINE__);
 829                                mpsslog("%s %s %d read from tap 0x%lx\n",
 830                                        mic->name, __func__, __LINE__,
 831                                        len);
 832#endif
 833                                spin_for_descriptors(mic, &tx_vr);
 834                                txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
 835                                             &copy, len);
 836
 837                                err = mic_virtio_copy(mic,
 838                                        mic->mic_console.virtio_console_fd,
 839                                        &tx_vr, &copy);
 840                                if (err < 0) {
 841                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 842                                                mic->name, __func__, __LINE__,
 843                                                strerror(errno));
 844                                }
 845                                if (!err)
 846                                        verify_out_len(mic, &copy);
 847#ifdef DEBUG
 848                                disp_iovec(mic, copy, __func__, __LINE__);
 849                                mpsslog("%s %s %d wrote to net 0x%lx\n",
 850                                        mic->name, __func__, __LINE__,
 851                                        sum_iovec_len(copy));
 852#endif
 853                                /* Reinitialize IOV for next run */
 854                                iov0->iov_len = PAGE_SIZE;
 855                        } else if (len < 0) {
 856                                disp_iovec(mic, &copy, __func__, __LINE__);
 857                                mpsslog("%s %s %d read failed %s ",
 858                                        mic->name, __func__, __LINE__,
 859                                        strerror(errno));
 860                                mpsslog("cnt %d sum %zd\n",
 861                                        copy.iovcnt, sum_iovec_len(&copy));
 862                        }
 863                }
 864
 865                if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
 866                        while (rx_vr.info->avail_idx !=
 867                                le16toh(rx_vr.vr.avail->idx)) {
 868                                copy.iov = iov1;
 869                                txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
 870                                             &copy, PAGE_SIZE);
 871
 872                                err = mic_virtio_copy(mic,
 873                                        mic->mic_console.virtio_console_fd,
 874                                        &rx_vr, &copy);
 875                                if (!err) {
 876                                        /* Set the correct output iov_len */
 877                                        iov1->iov_len = copy.out_len;
 878                                        verify_out_len(mic, &copy);
 879#ifdef DEBUG
 880                                        disp_iovec(mic, copy, __func__,
 881                                                   __LINE__);
 882                                        mpsslog("%s %s %d ",
 883                                                mic->name, __func__, __LINE__);
 884                                        mpsslog("read from net 0x%lx\n",
 885                                                sum_iovec_len(copy));
 886#endif
 887                                        len = writev(pty_fd,
 888                                                copy.iov, copy.iovcnt);
 889                                        if (len != sum_iovec_len(&copy)) {
 890                                                mpsslog("Tun write failed %s ",
 891                                                        strerror(errno));
 892                                                mpsslog("len 0x%zx ", len);
 893                                                mpsslog("read_len 0x%zx\n",
 894                                                        sum_iovec_len(&copy));
 895                                        } else {
 896#ifdef DEBUG
 897                                                disp_iovec(mic, copy, __func__,
 898                                                           __LINE__);
 899                                                mpsslog("%s %s %d ",
 900                                                        mic->name, __func__,
 901                                                        __LINE__);
 902                                                mpsslog("wrote to tap 0x%lx\n",
 903                                                        len);
 904#endif
 905                                        }
 906                                } else {
 907                                        mpsslog("%s %s %d mic_virtio_copy %s\n",
 908                                                mic->name, __func__, __LINE__,
 909                                                strerror(errno));
 910                                        break;
 911                                }
 912                        }
 913                }
 914                if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
 915                        mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
 916        }
 917_close_pty:
 918        close(pty_fd);
 919_return:
 920        pthread_exit(NULL);
 921}
 922
 923static void
 924add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
 925{
 926        char path[PATH_MAX];
 927        int fd, err;
 928
 929        snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
 930        fd = open(path, O_RDWR);
 931        if (fd < 0) {
 932                mpsslog("Could not open %s %s\n", path, strerror(errno));
 933                return;
 934        }
 935
 936        err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
 937        if (err < 0) {
 938                mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
 939                close(fd);
 940                return;
 941        }
 942        switch (dd->type) {
 943        case VIRTIO_ID_NET:
 944                mic->mic_net.virtio_net_fd = fd;
 945                mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
 946                break;
 947        case VIRTIO_ID_CONSOLE:
 948                mic->mic_console.virtio_console_fd = fd;
 949                mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
 950                break;
 951        case VIRTIO_ID_BLOCK:
 952                mic->mic_virtblk.virtio_block_fd = fd;
 953                mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
 954                break;
 955        }
 956}
 957
 958static bool
 959set_backend_file(struct mic_info *mic)
 960{
 961        FILE *config;
 962        char buff[PATH_MAX], *line, *evv, *p;
 963
 964        snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
 965        config = fopen(buff, "r");
 966        if (config == NULL)
 967                return false;
 968        do {  /* look for "virtblk_backend=XXXX" */
 969                line = fgets(buff, PATH_MAX, config);
 970                if (line == NULL)
 971                        break;
 972                if (*line == '#')
 973                        continue;
 974                p = strchr(line, '\n');
 975                if (p)
 976                        *p = '\0';
 977        } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
 978        fclose(config);
 979        if (line == NULL)
 980                return false;
 981        evv = strchr(line, '=');
 982        if (evv == NULL)
 983                return false;
 984        mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
 985        if (mic->mic_virtblk.backend_file == NULL) {
 986                mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
 987                return false;
 988        }
 989        strcpy(mic->mic_virtblk.backend_file, evv + 1);
 990        return true;
 991}
 992
 993#define SECTOR_SIZE 512
 994static bool
 995set_backend_size(struct mic_info *mic)
 996{
 997        mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
 998                SEEK_END);
 999        if (mic->mic_virtblk.backend_size < 0) {
1000                mpsslog("%s: can't seek: %s\n",
1001                        mic->name, mic->mic_virtblk.backend_file);
1002                return false;
1003        }
1004        virtblk_dev_page.blk_config.capacity =
1005                mic->mic_virtblk.backend_size / SECTOR_SIZE;
1006        if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
1007                virtblk_dev_page.blk_config.capacity++;
1008
1009        virtblk_dev_page.blk_config.capacity =
1010                htole64(virtblk_dev_page.blk_config.capacity);
1011
1012        return true;
1013}
1014
1015static bool
1016open_backend(struct mic_info *mic)
1017{
1018        if (!set_backend_file(mic))
1019                goto _error_exit;
1020        mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1021        if (mic->mic_virtblk.backend < 0) {
1022                mpsslog("%s: can't open: %s\n", mic->name,
1023                        mic->mic_virtblk.backend_file);
1024                goto _error_free;
1025        }
1026        if (!set_backend_size(mic))
1027                goto _error_close;
1028        mic->mic_virtblk.backend_addr = mmap(NULL,
1029                mic->mic_virtblk.backend_size,
1030                PROT_READ|PROT_WRITE, MAP_SHARED,
1031                mic->mic_virtblk.backend, 0L);
1032        if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1033                mpsslog("%s: can't map: %s %s\n",
1034                        mic->name, mic->mic_virtblk.backend_file,
1035                        strerror(errno));
1036                goto _error_close;
1037        }
1038        return true;
1039
1040 _error_close:
1041        close(mic->mic_virtblk.backend);
1042 _error_free:
1043        free(mic->mic_virtblk.backend_file);
1044 _error_exit:
1045        return false;
1046}
1047
1048static void
1049close_backend(struct mic_info *mic)
1050{
1051        munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1052        close(mic->mic_virtblk.backend);
1053        free(mic->mic_virtblk.backend_file);
1054}
1055
1056static bool
1057start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1058{
1059        if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1060                mpsslog("%s: blk_config is not 8 byte aligned.\n",
1061                        mic->name);
1062                return false;
1063        }
1064        add_virtio_device(mic, &virtblk_dev_page.dd);
1065        if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1066                                  VIRTIO_ID_BLOCK, vring, NULL,
1067                                  virtblk_dev_page.dd.num_vq)) {
1068                mpsslog("%s init_vr failed %s\n",
1069                        mic->name, strerror(errno));
1070                return false;
1071        }
1072        return true;
1073}
1074
1075static void
1076stop_virtblk(struct mic_info *mic)
1077{
1078        int vr_size, ret;
1079
1080        vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1081                                         MIC_VIRTIO_RING_ALIGN) +
1082                             sizeof(struct _mic_vring_info));
1083        ret = munmap(mic->mic_virtblk.block_dp,
1084                MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1085        if (ret < 0)
1086                mpsslog("%s munmap errno %d\n", mic->name, errno);
1087        close(mic->mic_virtblk.virtio_block_fd);
1088}
1089
1090static __u8
1091header_error_check(struct vring_desc *desc)
1092{
1093        if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1094                mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1095                        __func__, __LINE__);
1096                return -EIO;
1097        }
1098        if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1099                mpsslog("%s() %d: alone\n",
1100                        __func__, __LINE__);
1101                return -EIO;
1102        }
1103        if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1104                mpsslog("%s() %d: not read\n",
1105                        __func__, __LINE__);
1106                return -EIO;
1107        }
1108        return 0;
1109}
1110
1111static int
1112read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1113{
1114        struct iovec iovec;
1115        struct mic_copy_desc copy;
1116
1117        iovec.iov_len = sizeof(*hdr);
1118        iovec.iov_base = hdr;
1119        copy.iov = &iovec;
1120        copy.iovcnt = 1;
1121        copy.vr_idx = 0;  /* only one vring on virtio_block */
1122        copy.update_used = false;  /* do not update used index */
1123        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1124}
1125
1126static int
1127transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1128{
1129        struct mic_copy_desc copy;
1130
1131        copy.iov = iovec;
1132        copy.iovcnt = iovcnt;
1133        copy.vr_idx = 0;  /* only one vring on virtio_block */
1134        copy.update_used = false;  /* do not update used index */
1135        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1136}
1137
1138static __u8
1139status_error_check(struct vring_desc *desc)
1140{
1141        if (le32toh(desc->len) != sizeof(__u8)) {
1142                mpsslog("%s() %d: length is not sizeof(status)\n",
1143                        __func__, __LINE__);
1144                return -EIO;
1145        }
1146        return 0;
1147}
1148
1149static int
1150write_status(int fd, __u8 *status)
1151{
1152        struct iovec iovec;
1153        struct mic_copy_desc copy;
1154
1155        iovec.iov_base = status;
1156        iovec.iov_len = sizeof(*status);
1157        copy.iov = &iovec;
1158        copy.iovcnt = 1;
1159        copy.vr_idx = 0;  /* only one vring on virtio_block */
1160        copy.update_used = true; /* Update used index */
1161        return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1162}
1163
1164#ifndef VIRTIO_BLK_T_GET_ID
1165#define VIRTIO_BLK_T_GET_ID    8
1166#endif
1167
1168static void *
1169virtio_block(void *arg)
1170{
1171        struct mic_info *mic = (struct mic_info *)arg;
1172        int ret;
1173        struct pollfd block_poll;
1174        struct mic_vring vring;
1175        __u16 avail_idx;
1176        __u32 desc_idx;
1177        struct vring_desc *desc;
1178        struct iovec *iovec, *piov;
1179        __u8 status;
1180        __u32 buffer_desc_idx;
1181        struct virtio_blk_outhdr hdr;
1182        void *fos;
1183
1184        for (;;) {  /* forever */
1185                if (!open_backend(mic)) { /* No virtblk */
1186                        for (mic->mic_virtblk.signaled = 0;
1187                                !mic->mic_virtblk.signaled;)
1188                                sleep(1);
1189                        continue;
1190                }
1191
1192                /* backend file is specified. */
1193                if (!start_virtblk(mic, &vring))
1194                        goto _close_backend;
1195                iovec = malloc(sizeof(*iovec) *
1196                        le32toh(virtblk_dev_page.blk_config.seg_max));
1197                if (!iovec) {
1198                        mpsslog("%s: can't alloc iovec: %s\n",
1199                                mic->name, strerror(ENOMEM));
1200                        goto _stop_virtblk;
1201                }
1202
1203                block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1204                block_poll.events = POLLIN;
1205                for (mic->mic_virtblk.signaled = 0;
1206                     !mic->mic_virtblk.signaled;) {
1207                        block_poll.revents = 0;
1208                                        /* timeout in 1 sec to see signaled */
1209                        ret = poll(&block_poll, 1, 1000);
1210                        if (ret < 0) {
1211                                mpsslog("%s %d: poll failed: %s\n",
1212                                        __func__, __LINE__,
1213                                        strerror(errno));
1214                                continue;
1215                        }
1216
1217                        if (!(block_poll.revents & POLLIN)) {
1218#ifdef DEBUG
1219                                mpsslog("%s %d: block_poll.revents=0x%x\n",
1220                                        __func__, __LINE__, block_poll.revents);
1221#endif
1222                                continue;
1223                        }
1224
1225                        /* POLLIN */
1226                        while (vring.info->avail_idx !=
1227                                le16toh(vring.vr.avail->idx)) {
1228                                /* read header element */
1229                                avail_idx =
1230                                        vring.info->avail_idx &
1231                                        (vring.vr.num - 1);
1232                                desc_idx = le16toh(
1233                                        vring.vr.avail->ring[avail_idx]);
1234                                desc = &vring.vr.desc[desc_idx];
1235#ifdef DEBUG
1236                                mpsslog("%s() %d: avail_idx=%d ",
1237                                        __func__, __LINE__,
1238                                        vring.info->avail_idx);
1239                                mpsslog("vring.vr.num=%d desc=%p\n",
1240                                        vring.vr.num, desc);
1241#endif
1242                                status = header_error_check(desc);
1243                                ret = read_header(
1244                                        mic->mic_virtblk.virtio_block_fd,
1245                                        &hdr, desc_idx);
1246                                if (ret < 0) {
1247                                        mpsslog("%s() %d %s: ret=%d %s\n",
1248                                                __func__, __LINE__,
1249                                                mic->name, ret,
1250                                                strerror(errno));
1251                                        break;
1252                                }
1253                                /* buffer element */
1254                                piov = iovec;
1255                                status = 0;
1256                                fos = mic->mic_virtblk.backend_addr +
1257                                        (hdr.sector * SECTOR_SIZE);
1258                                buffer_desc_idx = next_desc(desc);
1259                                desc_idx = buffer_desc_idx;
1260                                for (desc = &vring.vr.desc[buffer_desc_idx];
1261                                     desc->flags & VRING_DESC_F_NEXT;
1262                                     desc_idx = next_desc(desc),
1263                                             desc = &vring.vr.desc[desc_idx]) {
1264                                        piov->iov_len = desc->len;
1265                                        piov->iov_base = fos;
1266                                        piov++;
1267                                        fos += desc->len;
1268                                }
1269                                /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1270                                if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1271                                        VIRTIO_BLK_T_GET_ID)) {
1272                                        /*
1273                                          VIRTIO_BLK_T_IN - does not do
1274                                          anything. Probably for documenting.
1275                                          VIRTIO_BLK_T_SCSI_CMD - for
1276                                          virtio_scsi.
1277                                          VIRTIO_BLK_T_FLUSH - turned off in
1278                                          config space.
1279                                          VIRTIO_BLK_T_BARRIER - defined but not
1280                                          used in anywhere.
1281                                        */
1282                                        mpsslog("%s() %d: type %x ",
1283                                                __func__, __LINE__,
1284                                                hdr.type);
1285                                        mpsslog("is not supported\n");
1286                                        status = -ENOTSUP;
1287
1288                                } else {
1289                                        ret = transfer_blocks(
1290                                        mic->mic_virtblk.virtio_block_fd,
1291                                                iovec,
1292                                                piov - iovec);
1293                                        if (ret < 0 &&
1294                                            status != 0)
1295                                                status = ret;
1296                                }
1297                                /* write status and update used pointer */
1298                                if (status != 0)
1299                                        status = status_error_check(desc);
1300                                ret = write_status(
1301                                        mic->mic_virtblk.virtio_block_fd,
1302                                        &status);
1303#ifdef DEBUG
1304                                mpsslog("%s() %d: write status=%d on desc=%p\n",
1305                                        __func__, __LINE__,
1306                                        status, desc);
1307#endif
1308                        }
1309                }
1310                free(iovec);
1311_stop_virtblk:
1312                stop_virtblk(mic);
1313_close_backend:
1314                close_backend(mic);
1315        }  /* forever */
1316
1317        pthread_exit(NULL);
1318}
1319
1320static void
1321reset(struct mic_info *mic)
1322{
1323#define RESET_TIMEOUT 120
1324        int i = RESET_TIMEOUT;
1325        setsysfs(mic->name, "state", "reset");
1326        while (i) {
1327                char *state;
1328                state = readsysfs(mic->name, "state");
1329                if (!state)
1330                        goto retry;
1331                mpsslog("%s: %s %d state %s\n",
1332                        mic->name, __func__, __LINE__, state);
1333
1334                if (!strcmp(state, "ready")) {
1335                        free(state);
1336                        break;
1337                }
1338                free(state);
1339retry:
1340                sleep(1);
1341                i--;
1342        }
1343}
1344
1345static int
1346get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1347{
1348        if (!strcmp(shutdown_status, "nop"))
1349                return MIC_NOP;
1350        if (!strcmp(shutdown_status, "crashed"))
1351                return MIC_CRASHED;
1352        if (!strcmp(shutdown_status, "halted"))
1353                return MIC_HALTED;
1354        if (!strcmp(shutdown_status, "poweroff"))
1355                return MIC_POWER_OFF;
1356        if (!strcmp(shutdown_status, "restart"))
1357                return MIC_RESTART;
1358        mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1359        /* Invalid state */
1360        assert(0);
1361};
1362
1363static int get_mic_state(struct mic_info *mic)
1364{
1365        char *state = NULL;
1366        enum mic_states mic_state;
1367
1368        while (!state) {
1369                state = readsysfs(mic->name, "state");
1370                sleep(1);
1371        }
1372        mpsslog("%s: %s %d state %s\n",
1373                mic->name, __func__, __LINE__, state);
1374
1375        if (!strcmp(state, "ready")) {
1376                mic_state = MIC_READY;
1377        } else if (!strcmp(state, "booting")) {
1378                mic_state = MIC_BOOTING;
1379        } else if (!strcmp(state, "online")) {
1380                mic_state = MIC_ONLINE;
1381        } else if (!strcmp(state, "shutting_down")) {
1382                mic_state = MIC_SHUTTING_DOWN;
1383        } else if (!strcmp(state, "reset_failed")) {
1384                mic_state = MIC_RESET_FAILED;
1385        } else if (!strcmp(state, "resetting")) {
1386                mic_state = MIC_RESETTING;
1387        } else {
1388                mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1389                assert(0);
1390        }
1391
1392        free(state);
1393        return mic_state;
1394};
1395
1396static void mic_handle_shutdown(struct mic_info *mic)
1397{
1398#define SHUTDOWN_TIMEOUT 60
1399        int i = SHUTDOWN_TIMEOUT;
1400        char *shutdown_status;
1401        while (i) {
1402                shutdown_status = readsysfs(mic->name, "shutdown_status");
1403                if (!shutdown_status) {
1404                        sleep(1);
1405                        continue;
1406                }
1407                mpsslog("%s: %s %d shutdown_status %s\n",
1408                        mic->name, __func__, __LINE__, shutdown_status);
1409                switch (get_mic_shutdown_status(mic, shutdown_status)) {
1410                case MIC_RESTART:
1411                        mic->restart = 1;
1412                case MIC_HALTED:
1413                case MIC_POWER_OFF:
1414                case MIC_CRASHED:
1415                        free(shutdown_status);
1416                        goto reset;
1417                default:
1418                        break;
1419                }
1420                free(shutdown_status);
1421                sleep(1);
1422                i--;
1423        }
1424reset:
1425        if (!i)
1426                mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1427                        mic->name, __func__, __LINE__, shutdown_status);
1428        reset(mic);
1429}
1430
1431static int open_state_fd(struct mic_info *mic)
1432{
1433        char pathname[PATH_MAX];
1434        int fd;
1435
1436        snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1437                 MICSYSFSDIR, mic->name, "state");
1438
1439        fd = open(pathname, O_RDONLY);
1440        if (fd < 0)
1441                mpsslog("%s: opening file %s failed %s\n",
1442                        mic->name, pathname, strerror(errno));
1443        return fd;
1444}
1445
1446static int block_till_state_change(int fd, struct mic_info *mic)
1447{
1448        struct pollfd ufds[1];
1449        char value[PAGE_SIZE];
1450        int ret;
1451
1452        ufds[0].fd = fd;
1453        ufds[0].events = POLLERR | POLLPRI;
1454        ret = poll(ufds, 1, -1);
1455        if (ret < 0) {
1456                mpsslog("%s: %s %d poll failed %s\n",
1457                        mic->name, __func__, __LINE__, strerror(errno));
1458                return ret;
1459        }
1460
1461        ret = lseek(fd, 0, SEEK_SET);
1462        if (ret < 0) {
1463                mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1464                        mic->name, __func__, __LINE__, strerror(errno));
1465                return ret;
1466        }
1467
1468        ret = read(fd, value, sizeof(value));
1469        if (ret < 0) {
1470                mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1471                        mic->name, __func__, __LINE__, strerror(errno));
1472                return ret;
1473        }
1474
1475        return 0;
1476}
1477
1478static void *
1479mic_config(void *arg)
1480{
1481        struct mic_info *mic = (struct mic_info *)arg;
1482        int fd, ret, stat = 0;
1483
1484        fd = open_state_fd(mic);
1485        if (fd < 0) {
1486                mpsslog("%s: %s %d open state fd failed %s\n",
1487                        mic->name, __func__, __LINE__, strerror(errno));
1488                goto exit;
1489        }
1490
1491        do {
1492                ret = block_till_state_change(fd, mic);
1493                if (ret < 0) {
1494                        mpsslog("%s: %s %d block_till_state_change error %s\n",
1495                                mic->name, __func__, __LINE__, strerror(errno));
1496                        goto close_exit;
1497                }
1498
1499                switch (get_mic_state(mic)) {
1500                case MIC_SHUTTING_DOWN:
1501                        mic_handle_shutdown(mic);
1502                        break;
1503                case MIC_READY:
1504                case MIC_RESET_FAILED:
1505                        ret = kill(mic->pid, SIGTERM);
1506                        mpsslog("%s: %s %d kill pid %d ret %d\n",
1507                                mic->name, __func__, __LINE__,
1508                                mic->pid, ret);
1509                        if (!ret) {
1510                                ret = waitpid(mic->pid, &stat,
1511                                              WIFSIGNALED(stat));
1512                                mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1513                                        mic->name, __func__, __LINE__,
1514                                        ret, mic->pid);
1515                        }
1516                        if (mic->boot_on_resume) {
1517                                setsysfs(mic->name, "state", "boot");
1518                                mic->boot_on_resume = 0;
1519                        }
1520                        goto close_exit;
1521                default:
1522                        break;
1523                }
1524        } while (1);
1525
1526close_exit:
1527        close(fd);
1528exit:
1529        init_mic(mic);
1530        pthread_exit(NULL);
1531}
1532
1533static void
1534set_cmdline(struct mic_info *mic)
1535{
1536        char buffer[PATH_MAX];
1537        int len;
1538
1539        len = snprintf(buffer, PATH_MAX,
1540                "clocksource=tsc highres=off nohz=off ");
1541        len += snprintf(buffer + len, PATH_MAX,
1542                "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1543        len += snprintf(buffer + len, PATH_MAX,
1544                "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1545                mic->id + 1);
1546
1547        setsysfs(mic->name, "cmdline", buffer);
1548        mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1549        snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1550        mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1551}
1552
1553static void
1554set_log_buf_info(struct mic_info *mic)
1555{
1556        int fd;
1557        off_t len;
1558        char system_map[] = "/lib/firmware/mic/System.map";
1559        char *map, *temp, log_buf[17] = {'\0'};
1560
1561        fd = open(system_map, O_RDONLY);
1562        if (fd < 0) {
1563                mpsslog("%s: Opening System.map failed: %d\n",
1564                        mic->name, errno);
1565                return;
1566        }
1567        len = lseek(fd, 0, SEEK_END);
1568        if (len < 0) {
1569                mpsslog("%s: Reading System.map size failed: %d\n",
1570                        mic->name, errno);
1571                close(fd);
1572                return;
1573        }
1574        map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1575        if (map == MAP_FAILED) {
1576                mpsslog("%s: mmap of System.map failed: %d\n",
1577                        mic->name, errno);
1578                close(fd);
1579                return;
1580        }
1581        temp = strstr(map, "__log_buf");
1582        if (!temp) {
1583                mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1584                munmap(map, len);
1585                close(fd);
1586                return;
1587        }
1588        strncpy(log_buf, temp - 19, 16);
1589        setsysfs(mic->name, "log_buf_addr", log_buf);
1590        mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1591        temp = strstr(map, "log_buf_len");
1592        if (!temp) {
1593                mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1594                munmap(map, len);
1595                close(fd);
1596                return;
1597        }
1598        strncpy(log_buf, temp - 19, 16);
1599        setsysfs(mic->name, "log_buf_len", log_buf);
1600        mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1601        munmap(map, len);
1602        close(fd);
1603}
1604
1605static void
1606change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1607{
1608        struct mic_info *mic;
1609
1610        for (mic = mic_list.next; mic != NULL; mic = mic->next)
1611                mic->mic_virtblk.signaled = 1/* true */;
1612}
1613
1614static void
1615set_mic_boot_params(struct mic_info *mic)
1616{
1617        set_log_buf_info(mic);
1618        set_cmdline(mic);
1619}
1620
1621static void *
1622init_mic(void *arg)
1623{
1624        struct mic_info *mic = (struct mic_info *)arg;
1625        struct sigaction ignore = {
1626                .sa_flags = 0,
1627                .sa_handler = SIG_IGN
1628        };
1629        struct sigaction act = {
1630                .sa_flags = SA_SIGINFO,
1631                .sa_sigaction = change_virtblk_backend,
1632        };
1633        char buffer[PATH_MAX];
1634        int err, fd;
1635
1636        /*
1637         * Currently, one virtio block device is supported for each MIC card
1638         * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1639         * The signal informs the virtio block backend about a change in the
1640         * configuration file which specifies the virtio backend file name on
1641         * the host. Virtio block backend then re-reads the configuration file
1642         * and switches to the new block device. This signalling mechanism may
1643         * not be required once multiple virtio block devices are supported by
1644         * the MIC daemon.
1645         */
1646        sigaction(SIGUSR1, &ignore, NULL);
1647retry:
1648        fd = open_state_fd(mic);
1649        if (fd < 0) {
1650                mpsslog("%s: %s %d open state fd failed %s\n",
1651                        mic->name, __func__, __LINE__, strerror(errno));
1652                sleep(2);
1653                goto retry;
1654        }
1655
1656        if (mic->restart) {
1657                snprintf(buffer, PATH_MAX, "boot");
1658                setsysfs(mic->name, "state", buffer);
1659                mpsslog("%s restarting mic %d\n",
1660                        mic->name, mic->restart);
1661                mic->restart = 0;
1662        }
1663
1664        while (1) {
1665                while (block_till_state_change(fd, mic)) {
1666                        mpsslog("%s: %s %d block_till_state_change error %s\n",
1667                                mic->name, __func__, __LINE__, strerror(errno));
1668                        sleep(2);
1669                        continue;
1670                }
1671
1672                if (get_mic_state(mic) == MIC_BOOTING)
1673                        break;
1674        }
1675
1676        mic->pid = fork();
1677        switch (mic->pid) {
1678        case 0:
1679                add_virtio_device(mic, &virtcons_dev_page.dd);
1680                add_virtio_device(mic, &virtnet_dev_page.dd);
1681                err = pthread_create(&mic->mic_console.console_thread, NULL,
1682                        virtio_console, mic);
1683                if (err)
1684                        mpsslog("%s virtcons pthread_create failed %s\n",
1685                                mic->name, strerror(err));
1686                err = pthread_create(&mic->mic_net.net_thread, NULL,
1687                        virtio_net, mic);
1688                if (err)
1689                        mpsslog("%s virtnet pthread_create failed %s\n",
1690                                mic->name, strerror(err));
1691                err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1692                        virtio_block, mic);
1693                if (err)
1694                        mpsslog("%s virtblk pthread_create failed %s\n",
1695                                mic->name, strerror(err));
1696                sigemptyset(&act.sa_mask);
1697                err = sigaction(SIGUSR1, &act, NULL);
1698                if (err)
1699                        mpsslog("%s sigaction SIGUSR1 failed %s\n",
1700                                mic->name, strerror(errno));
1701                while (1)
1702                        sleep(60);
1703        case -1:
1704                mpsslog("fork failed MIC name %s id %d errno %d\n",
1705                        mic->name, mic->id, errno);
1706                break;
1707        default:
1708                err = pthread_create(&mic->config_thread, NULL,
1709                                     mic_config, mic);
1710                if (err)
1711                        mpsslog("%s mic_config pthread_create failed %s\n",
1712                                mic->name, strerror(err));
1713        }
1714
1715        return NULL;
1716}
1717
1718static void
1719start_daemon(void)
1720{
1721        struct mic_info *mic;
1722        int err;
1723
1724        for (mic = mic_list.next; mic; mic = mic->next) {
1725                set_mic_boot_params(mic);
1726                err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1727                if (err)
1728                        mpsslog("%s init_mic pthread_create failed %s\n",
1729                                mic->name, strerror(err));
1730        }
1731
1732        while (1)
1733                sleep(60);
1734}
1735
1736static int
1737init_mic_list(void)
1738{
1739        struct mic_info *mic = &mic_list;
1740        struct dirent *file;
1741        DIR *dp;
1742        int cnt = 0;
1743
1744        dp = opendir(MICSYSFSDIR);
1745        if (!dp)
1746                return 0;
1747
1748        while ((file = readdir(dp)) != NULL) {
1749                if (!strncmp(file->d_name, "mic", 3)) {
1750                        mic->next = calloc(1, sizeof(struct mic_info));
1751                        if (mic->next) {
1752                                mic = mic->next;
1753                                mic->id = atoi(&file->d_name[3]);
1754                                mic->name = malloc(strlen(file->d_name) + 16);
1755                                if (mic->name)
1756                                        strcpy(mic->name, file->d_name);
1757                                mpsslog("MIC name %s id %d\n", mic->name,
1758                                        mic->id);
1759                                cnt++;
1760                        }
1761                }
1762        }
1763
1764        closedir(dp);
1765        return cnt;
1766}
1767
1768void
1769mpsslog(char *format, ...)
1770{
1771        va_list args;
1772        char buffer[4096];
1773        char ts[52], *ts1;
1774        time_t t;
1775
1776        if (logfp == NULL)
1777                return;
1778
1779        va_start(args, format);
1780        vsprintf(buffer, format, args);
1781        va_end(args);
1782
1783        time(&t);
1784        ts1 = ctime_r(&t, ts);
1785        ts1[strlen(ts1) - 1] = '\0';
1786        fprintf(logfp, "%s: %s", ts1, buffer);
1787
1788        fflush(logfp);
1789}
1790
1791int
1792main(int argc, char *argv[])
1793{
1794        int cnt;
1795        pid_t pid;
1796
1797        myname = argv[0];
1798
1799        logfp = fopen(LOGFILE_NAME, "a+");
1800        if (!logfp) {
1801                fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1802                exit(1);
1803        }
1804        pid = fork();
1805        switch (pid) {
1806        case 0:
1807                break;
1808        case -1:
1809                exit(2);
1810        default:
1811                exit(0);
1812        }
1813
1814        mpsslog("MIC Daemon start\n");
1815
1816        cnt = init_mic_list();
1817        if (cnt == 0) {
1818                mpsslog("MIC module not loaded\n");
1819                exit(3);
1820        }
1821        mpsslog("MIC found %d devices\n", cnt);
1822
1823        start_daemon();
1824
1825        exit(0);
1826}
1827