linux/drivers/hv/channel_mgmt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2009, Microsoft Corporation.
   4 *
   5 * Authors:
   6 *   Haiyang Zhang <haiyangz@microsoft.com>
   7 *   Hank Janssen  <hjanssen@microsoft.com>
   8 */
   9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11#include <linux/kernel.h>
  12#include <linux/interrupt.h>
  13#include <linux/sched.h>
  14#include <linux/wait.h>
  15#include <linux/mm.h>
  16#include <linux/slab.h>
  17#include <linux/list.h>
  18#include <linux/module.h>
  19#include <linux/completion.h>
  20#include <linux/delay.h>
  21#include <linux/cpu.h>
  22#include <linux/hyperv.h>
  23#include <asm/mshyperv.h>
  24
  25#include "hyperv_vmbus.h"
  26
  27static void init_vp_index(struct vmbus_channel *channel);
  28
  29const struct vmbus_device vmbus_devs[] = {
  30        /* IDE */
  31        { .dev_type = HV_IDE,
  32          HV_IDE_GUID,
  33          .perf_device = true,
  34          .allowed_in_isolated = false,
  35        },
  36
  37        /* SCSI */
  38        { .dev_type = HV_SCSI,
  39          HV_SCSI_GUID,
  40          .perf_device = true,
  41          .allowed_in_isolated = true,
  42        },
  43
  44        /* Fibre Channel */
  45        { .dev_type = HV_FC,
  46          HV_SYNTHFC_GUID,
  47          .perf_device = true,
  48          .allowed_in_isolated = false,
  49        },
  50
  51        /* Synthetic NIC */
  52        { .dev_type = HV_NIC,
  53          HV_NIC_GUID,
  54          .perf_device = true,
  55          .allowed_in_isolated = true,
  56        },
  57
  58        /* Network Direct */
  59        { .dev_type = HV_ND,
  60          HV_ND_GUID,
  61          .perf_device = true,
  62          .allowed_in_isolated = false,
  63        },
  64
  65        /* PCIE */
  66        { .dev_type = HV_PCIE,
  67          HV_PCIE_GUID,
  68          .perf_device = false,
  69          .allowed_in_isolated = false,
  70        },
  71
  72        /* Synthetic Frame Buffer */
  73        { .dev_type = HV_FB,
  74          HV_SYNTHVID_GUID,
  75          .perf_device = false,
  76          .allowed_in_isolated = false,
  77        },
  78
  79        /* Synthetic Keyboard */
  80        { .dev_type = HV_KBD,
  81          HV_KBD_GUID,
  82          .perf_device = false,
  83          .allowed_in_isolated = false,
  84        },
  85
  86        /* Synthetic MOUSE */
  87        { .dev_type = HV_MOUSE,
  88          HV_MOUSE_GUID,
  89          .perf_device = false,
  90          .allowed_in_isolated = false,
  91        },
  92
  93        /* KVP */
  94        { .dev_type = HV_KVP,
  95          HV_KVP_GUID,
  96          .perf_device = false,
  97          .allowed_in_isolated = false,
  98        },
  99
 100        /* Time Synch */
 101        { .dev_type = HV_TS,
 102          HV_TS_GUID,
 103          .perf_device = false,
 104          .allowed_in_isolated = true,
 105        },
 106
 107        /* Heartbeat */
 108        { .dev_type = HV_HB,
 109          HV_HEART_BEAT_GUID,
 110          .perf_device = false,
 111          .allowed_in_isolated = true,
 112        },
 113
 114        /* Shutdown */
 115        { .dev_type = HV_SHUTDOWN,
 116          HV_SHUTDOWN_GUID,
 117          .perf_device = false,
 118          .allowed_in_isolated = true,
 119        },
 120
 121        /* File copy */
 122        { .dev_type = HV_FCOPY,
 123          HV_FCOPY_GUID,
 124          .perf_device = false,
 125          .allowed_in_isolated = false,
 126        },
 127
 128        /* Backup */
 129        { .dev_type = HV_BACKUP,
 130          HV_VSS_GUID,
 131          .perf_device = false,
 132          .allowed_in_isolated = false,
 133        },
 134
 135        /* Dynamic Memory */
 136        { .dev_type = HV_DM,
 137          HV_DM_GUID,
 138          .perf_device = false,
 139          .allowed_in_isolated = false,
 140        },
 141
 142        /* Unknown GUID */
 143        { .dev_type = HV_UNKNOWN,
 144          .perf_device = false,
 145          .allowed_in_isolated = false,
 146        },
 147};
 148
 149static const struct {
 150        guid_t guid;
 151} vmbus_unsupported_devs[] = {
 152        { HV_AVMA1_GUID },
 153        { HV_AVMA2_GUID },
 154        { HV_RDV_GUID   },
 155};
 156
 157/*
 158 * The rescinded channel may be blocked waiting for a response from the host;
 159 * take care of that.
 160 */
 161static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
 162{
 163        struct vmbus_channel_msginfo *msginfo;
 164        unsigned long flags;
 165
 166
 167        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 168        channel->rescind = true;
 169        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 170                                msglistentry) {
 171
 172                if (msginfo->waiting_channel == channel) {
 173                        complete(&msginfo->waitevent);
 174                        break;
 175                }
 176        }
 177        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 178}
 179
 180static bool is_unsupported_vmbus_devs(const guid_t *guid)
 181{
 182        int i;
 183
 184        for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
 185                if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
 186                        return true;
 187        return false;
 188}
 189
 190static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 191{
 192        const guid_t *guid = &channel->offermsg.offer.if_type;
 193        u16 i;
 194
 195        if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 196                return HV_UNKNOWN;
 197
 198        for (i = HV_IDE; i < HV_UNKNOWN; i++) {
 199                if (guid_equal(guid, &vmbus_devs[i].guid))
 200                        return i;
 201        }
 202        pr_info("Unknown GUID: %pUl\n", guid);
 203        return i;
 204}
 205
 206/**
 207 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
 208 * @icmsghdrp: Pointer to msg header structure
 209 * @buf: Raw buffer channel data
 210 * @buflen: Length of the raw buffer channel data.
 211 * @fw_version: The framework versions we can support.
 212 * @fw_vercnt: The size of @fw_version.
 213 * @srv_version: The service versions we can support.
 214 * @srv_vercnt: The size of @srv_version.
 215 * @nego_fw_version: The selected framework version.
 216 * @nego_srv_version: The selected service version.
 217 *
 218 * Note: Versions are given in decreasing order.
 219 *
 220 * Set up and fill in default negotiate response message.
 221 * Mainly used by Hyper-V drivers.
 222 */
 223bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 224                                u32 buflen, const int *fw_version, int fw_vercnt,
 225                                const int *srv_version, int srv_vercnt,
 226                                int *nego_fw_version, int *nego_srv_version)
 227{
 228        int icframe_major, icframe_minor;
 229        int icmsg_major, icmsg_minor;
 230        int fw_major, fw_minor;
 231        int srv_major, srv_minor;
 232        int i, j;
 233        bool found_match = false;
 234        struct icmsg_negotiate *negop;
 235
 236        /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */
 237        if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
 238                pr_err_ratelimited("Invalid icmsg negotiate\n");
 239                return false;
 240        }
 241
 242        icmsghdrp->icmsgsize = 0x10;
 243        negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
 244
 245        icframe_major = negop->icframe_vercnt;
 246        icframe_minor = 0;
 247
 248        icmsg_major = negop->icmsg_vercnt;
 249        icmsg_minor = 0;
 250
 251        /* Validate negop packet */
 252        if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
 253            icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
 254            ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
 255                pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
 256                                   icframe_major, icmsg_major);
 257                goto fw_error;
 258        }
 259
 260        /*
 261         * Select the framework version number we will
 262         * support.
 263         */
 264
 265        for (i = 0; i < fw_vercnt; i++) {
 266                fw_major = (fw_version[i] >> 16);
 267                fw_minor = (fw_version[i] & 0xFFFF);
 268
 269                for (j = 0; j < negop->icframe_vercnt; j++) {
 270                        if ((negop->icversion_data[j].major == fw_major) &&
 271                            (negop->icversion_data[j].minor == fw_minor)) {
 272                                icframe_major = negop->icversion_data[j].major;
 273                                icframe_minor = negop->icversion_data[j].minor;
 274                                found_match = true;
 275                                break;
 276                        }
 277                }
 278
 279                if (found_match)
 280                        break;
 281        }
 282
 283        if (!found_match)
 284                goto fw_error;
 285
 286        found_match = false;
 287
 288        for (i = 0; i < srv_vercnt; i++) {
 289                srv_major = (srv_version[i] >> 16);
 290                srv_minor = (srv_version[i] & 0xFFFF);
 291
 292                for (j = negop->icframe_vercnt;
 293                        (j < negop->icframe_vercnt + negop->icmsg_vercnt);
 294                        j++) {
 295
 296                        if ((negop->icversion_data[j].major == srv_major) &&
 297                                (negop->icversion_data[j].minor == srv_minor)) {
 298
 299                                icmsg_major = negop->icversion_data[j].major;
 300                                icmsg_minor = negop->icversion_data[j].minor;
 301                                found_match = true;
 302                                break;
 303                        }
 304                }
 305
 306                if (found_match)
 307                        break;
 308        }
 309
 310        /*
 311         * Respond with the framework and service
 312         * version numbers we can support.
 313         */
 314
 315fw_error:
 316        if (!found_match) {
 317                negop->icframe_vercnt = 0;
 318                negop->icmsg_vercnt = 0;
 319        } else {
 320                negop->icframe_vercnt = 1;
 321                negop->icmsg_vercnt = 1;
 322        }
 323
 324        if (nego_fw_version)
 325                *nego_fw_version = (icframe_major << 16) | icframe_minor;
 326
 327        if (nego_srv_version)
 328                *nego_srv_version = (icmsg_major << 16) | icmsg_minor;
 329
 330        negop->icversion_data[0].major = icframe_major;
 331        negop->icversion_data[0].minor = icframe_minor;
 332        negop->icversion_data[1].major = icmsg_major;
 333        negop->icversion_data[1].minor = icmsg_minor;
 334        return found_match;
 335}
 336EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 337
 338/*
 339 * alloc_channel - Allocate and initialize a vmbus channel object
 340 */
 341static struct vmbus_channel *alloc_channel(void)
 342{
 343        struct vmbus_channel *channel;
 344
 345        channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 346        if (!channel)
 347                return NULL;
 348
 349        spin_lock_init(&channel->sched_lock);
 350        init_completion(&channel->rescind_event);
 351
 352        INIT_LIST_HEAD(&channel->sc_list);
 353
 354        tasklet_init(&channel->callback_event,
 355                     vmbus_on_event, (unsigned long)channel);
 356
 357        hv_ringbuffer_pre_init(channel);
 358
 359        return channel;
 360}
 361
 362/*
 363 * free_channel - Release the resources used by the vmbus channel object
 364 */
 365static void free_channel(struct vmbus_channel *channel)
 366{
 367        tasklet_kill(&channel->callback_event);
 368        vmbus_remove_channel_attr_group(channel);
 369
 370        kobject_put(&channel->kobj);
 371}
 372
 373void vmbus_channel_map_relid(struct vmbus_channel *channel)
 374{
 375        if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
 376                return;
 377        /*
 378         * The mapping of the channel's relid is visible from the CPUs that
 379         * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
 380         * execute:
 381         *
 382         *  (a) In the "normal (i.e., not resuming from hibernation)" path,
 383         *      the full barrier in smp_store_mb() guarantees that the store
 384         *      is propagated to all CPUs before the add_channel_work work
 385         *      is queued.  In turn, add_channel_work is queued before the
 386         *      channel's ring buffer is allocated/initialized and the
 387         *      OPENCHANNEL message for the channel is sent in vmbus_open().
 388         *      Hyper-V won't start sending the interrupts for the channel
 389         *      before the OPENCHANNEL message is acked.  The memory barrier
 390         *      in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
 391         *      that vmbus_chan_sched() must find the channel's relid in
 392         *      recv_int_page before retrieving the channel pointer from the
 393         *      array of channels.
 394         *
 395         *  (b) In the "resuming from hibernation" path, the smp_store_mb()
 396         *      guarantees that the store is propagated to all CPUs before
 397         *      the VMBus connection is marked as ready for the resume event
 398         *      (cf. check_ready_for_resume_event()).  The interrupt handler
 399         *      of the VMBus driver and vmbus_chan_sched() can not run before
 400         *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
 401         */
 402        smp_store_mb(
 403                vmbus_connection.channels[channel->offermsg.child_relid],
 404                channel);
 405}
 406
 407void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
 408{
 409        if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
 410                return;
 411        WRITE_ONCE(
 412                vmbus_connection.channels[channel->offermsg.child_relid],
 413                NULL);
 414}
 415
 416static void vmbus_release_relid(u32 relid)
 417{
 418        struct vmbus_channel_relid_released msg;
 419        int ret;
 420
 421        memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 422        msg.child_relid = relid;
 423        msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 424        ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
 425                             true);
 426
 427        trace_vmbus_release_relid(&msg, ret);
 428}
 429
 430void hv_process_channel_removal(struct vmbus_channel *channel)
 431{
 432        lockdep_assert_held(&vmbus_connection.channel_mutex);
 433        BUG_ON(!channel->rescind);
 434
 435        /*
 436         * hv_process_channel_removal() could find INVALID_RELID only for
 437         * hv_sock channels.  See the inline comments in vmbus_onoffer().
 438         */
 439        WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
 440                !is_hvsock_channel(channel));
 441
 442        /*
 443         * Upon suspend, an in-use hv_sock channel is removed from the array of
 444         * channels and the relid is invalidated.  After hibernation, when the
 445         * user-space appplication destroys the channel, it's unnecessary and
 446         * unsafe to remove the channel from the array of channels.  See also
 447         * the inline comments before the call of vmbus_release_relid() below.
 448         */
 449        if (channel->offermsg.child_relid != INVALID_RELID)
 450                vmbus_channel_unmap_relid(channel);
 451
 452        if (channel->primary_channel == NULL)
 453                list_del(&channel->listentry);
 454        else
 455                list_del(&channel->sc_list);
 456
 457        /*
 458         * If this is a "perf" channel, updates the hv_numa_map[] masks so that
 459         * init_vp_index() can (re-)use the CPU.
 460         */
 461        if (hv_is_perf_channel(channel))
 462                hv_clear_alloced_cpu(channel->target_cpu);
 463
 464        /*
 465         * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
 466         * the relid is invalidated; after hibernation, when the user-space app
 467         * destroys the channel, the relid is INVALID_RELID, and in this case
 468         * it's unnecessary and unsafe to release the old relid, since the same
 469         * relid can refer to a completely different channel now.
 470         */
 471        if (channel->offermsg.child_relid != INVALID_RELID)
 472                vmbus_release_relid(channel->offermsg.child_relid);
 473
 474        free_channel(channel);
 475}
 476
 477void vmbus_free_channels(void)
 478{
 479        struct vmbus_channel *channel, *tmp;
 480
 481        list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 482                listentry) {
 483                /* hv_process_channel_removal() needs this */
 484                channel->rescind = true;
 485
 486                vmbus_device_unregister(channel->device_obj);
 487        }
 488}
 489
 490/* Note: the function can run concurrently for primary/sub channels. */
 491static void vmbus_add_channel_work(struct work_struct *work)
 492{
 493        struct vmbus_channel *newchannel =
 494                container_of(work, struct vmbus_channel, add_channel_work);
 495        struct vmbus_channel *primary_channel = newchannel->primary_channel;
 496        int ret;
 497
 498        /*
 499         * This state is used to indicate a successful open
 500         * so that when we do close the channel normally, we
 501         * can cleanup properly.
 502         */
 503        newchannel->state = CHANNEL_OPEN_STATE;
 504
 505        if (primary_channel != NULL) {
 506                /* newchannel is a sub-channel. */
 507                struct hv_device *dev = primary_channel->device_obj;
 508
 509                if (vmbus_add_channel_kobj(dev, newchannel))
 510                        goto err_deq_chan;
 511
 512                if (primary_channel->sc_creation_callback != NULL)
 513                        primary_channel->sc_creation_callback(newchannel);
 514
 515                newchannel->probe_done = true;
 516                return;
 517        }
 518
 519        /*
 520         * Start the process of binding the primary channel to the driver
 521         */
 522        newchannel->device_obj = vmbus_device_create(
 523                &newchannel->offermsg.offer.if_type,
 524                &newchannel->offermsg.offer.if_instance,
 525                newchannel);
 526        if (!newchannel->device_obj)
 527                goto err_deq_chan;
 528
 529        newchannel->device_obj->device_id = newchannel->device_id;
 530        /*
 531         * Add the new device to the bus. This will kick off device-driver
 532         * binding which eventually invokes the device driver's AddDevice()
 533         * method.
 534         */
 535        ret = vmbus_device_register(newchannel->device_obj);
 536
 537        if (ret != 0) {
 538                pr_err("unable to add child device object (relid %d)\n",
 539                        newchannel->offermsg.child_relid);
 540                kfree(newchannel->device_obj);
 541                goto err_deq_chan;
 542        }
 543
 544        newchannel->probe_done = true;
 545        return;
 546
 547err_deq_chan:
 548        mutex_lock(&vmbus_connection.channel_mutex);
 549
 550        /*
 551         * We need to set the flag, otherwise
 552         * vmbus_onoffer_rescind() can be blocked.
 553         */
 554        newchannel->probe_done = true;
 555
 556        if (primary_channel == NULL)
 557                list_del(&newchannel->listentry);
 558        else
 559                list_del(&newchannel->sc_list);
 560
 561        /* vmbus_process_offer() has mapped the channel. */
 562        vmbus_channel_unmap_relid(newchannel);
 563
 564        mutex_unlock(&vmbus_connection.channel_mutex);
 565
 566        vmbus_release_relid(newchannel->offermsg.child_relid);
 567
 568        free_channel(newchannel);
 569}
 570
 571/*
 572 * vmbus_process_offer - Process the offer by creating a channel/device
 573 * associated with this offer
 574 */
 575static void vmbus_process_offer(struct vmbus_channel *newchannel)
 576{
 577        struct vmbus_channel *channel;
 578        struct workqueue_struct *wq;
 579        bool fnew = true;
 580
 581        /*
 582         * Synchronize vmbus_process_offer() and CPU hotplugging:
 583         *
 584         * CPU1                         CPU2
 585         *
 586         * [vmbus_process_offer()]      [Hot removal of the CPU]
 587         *
 588         * CPU_READ_LOCK                CPUS_WRITE_LOCK
 589         * LOAD cpu_online_mask         SEARCH chn_list
 590         * STORE target_cpu             LOAD target_cpu
 591         * INSERT chn_list              STORE cpu_online_mask
 592         * CPUS_READ_UNLOCK             CPUS_WRITE_UNLOCK
 593         *
 594         * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
 595         *              CPU2's SEARCH from *not* seeing CPU1's INSERT
 596         *
 597         * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
 598         *              CPU2's LOAD from *not* seing CPU1's STORE
 599         */
 600        cpus_read_lock();
 601
 602        /*
 603         * Serializes the modifications of the chn_list list as well as
 604         * the accesses to next_numa_node_id in init_vp_index().
 605         */
 606        mutex_lock(&vmbus_connection.channel_mutex);
 607
 608        list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 609                if (guid_equal(&channel->offermsg.offer.if_type,
 610                               &newchannel->offermsg.offer.if_type) &&
 611                    guid_equal(&channel->offermsg.offer.if_instance,
 612                               &newchannel->offermsg.offer.if_instance)) {
 613                        fnew = false;
 614                        newchannel->primary_channel = channel;
 615                        break;
 616                }
 617        }
 618
 619        init_vp_index(newchannel);
 620
 621        /* Remember the channels that should be cleaned up upon suspend. */
 622        if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
 623                atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
 624
 625        /*
 626         * Now that we have acquired the channel_mutex,
 627         * we can release the potentially racing rescind thread.
 628         */
 629        atomic_dec(&vmbus_connection.offer_in_progress);
 630
 631        if (fnew) {
 632                list_add_tail(&newchannel->listentry,
 633                              &vmbus_connection.chn_list);
 634        } else {
 635                /*
 636                 * Check to see if this is a valid sub-channel.
 637                 */
 638                if (newchannel->offermsg.offer.sub_channel_index == 0) {
 639                        mutex_unlock(&vmbus_connection.channel_mutex);
 640                        /*
 641                         * Don't call free_channel(), because newchannel->kobj
 642                         * is not initialized yet.
 643                         */
 644                        kfree(newchannel);
 645                        WARN_ON_ONCE(1);
 646                        return;
 647                }
 648                /*
 649                 * Process the sub-channel.
 650                 */
 651                list_add_tail(&newchannel->sc_list, &channel->sc_list);
 652        }
 653
 654        vmbus_channel_map_relid(newchannel);
 655
 656        mutex_unlock(&vmbus_connection.channel_mutex);
 657        cpus_read_unlock();
 658
 659        /*
 660         * vmbus_process_offer() mustn't call channel->sc_creation_callback()
 661         * directly for sub-channels, because sc_creation_callback() ->
 662         * vmbus_open() may never get the host's response to the
 663         * OPEN_CHANNEL message (the host may rescind a channel at any time,
 664         * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
 665         * may not wake up the vmbus_open() as it's blocked due to a non-zero
 666         * vmbus_connection.offer_in_progress, and finally we have a deadlock.
 667         *
 668         * The above is also true for primary channels, if the related device
 669         * drivers use sync probing mode by default.
 670         *
 671         * And, usually the handling of primary channels and sub-channels can
 672         * depend on each other, so we should offload them to different
 673         * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
 674         * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
 675         * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
 676         * and waits for all the sub-channels to appear, but the latter
 677         * can't get the rtnl_lock and this blocks the handling of
 678         * sub-channels.
 679         */
 680        INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
 681        wq = fnew ? vmbus_connection.handle_primary_chan_wq :
 682                    vmbus_connection.handle_sub_chan_wq;
 683        queue_work(wq, &newchannel->add_channel_work);
 684}
 685
 686/*
 687 * Check if CPUs used by other channels of the same device.
 688 * It should only be called by init_vp_index().
 689 */
 690static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
 691{
 692        struct vmbus_channel *primary = chn->primary_channel;
 693        struct vmbus_channel *sc;
 694
 695        lockdep_assert_held(&vmbus_connection.channel_mutex);
 696
 697        if (!primary)
 698                return false;
 699
 700        if (primary->target_cpu == cpu)
 701                return true;
 702
 703        list_for_each_entry(sc, &primary->sc_list, sc_list)
 704                if (sc != chn && sc->target_cpu == cpu)
 705                        return true;
 706
 707        return false;
 708}
 709
 710/*
 711 * We use this state to statically distribute the channel interrupt load.
 712 */
 713static int next_numa_node_id;
 714
 715/*
 716 * Starting with Win8, we can statically distribute the incoming
 717 * channel interrupt load by binding a channel to VCPU.
 718 *
 719 * For pre-win8 hosts or non-performance critical channels we assign the
 720 * VMBUS_CONNECT_CPU.
 721 *
 722 * Starting with win8, performance critical channels will be distributed
 723 * evenly among all the available NUMA nodes.  Once the node is assigned,
 724 * we will assign the CPU based on a simple round robin scheme.
 725 */
 726static void init_vp_index(struct vmbus_channel *channel)
 727{
 728        bool perf_chn = hv_is_perf_channel(channel);
 729        u32 i, ncpu = num_online_cpus();
 730        cpumask_var_t available_mask;
 731        struct cpumask *alloced_mask;
 732        u32 target_cpu;
 733        int numa_node;
 734
 735        if ((vmbus_proto_version == VERSION_WS2008) ||
 736            (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
 737            !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
 738                /*
 739                 * Prior to win8, all channel interrupts are
 740                 * delivered on VMBUS_CONNECT_CPU.
 741                 * Also if the channel is not a performance critical
 742                 * channel, bind it to VMBUS_CONNECT_CPU.
 743                 * In case alloc_cpumask_var() fails, bind it to
 744                 * VMBUS_CONNECT_CPU.
 745                 */
 746                channel->target_cpu = VMBUS_CONNECT_CPU;
 747                if (perf_chn)
 748                        hv_set_alloced_cpu(VMBUS_CONNECT_CPU);
 749                return;
 750        }
 751
 752        for (i = 1; i <= ncpu + 1; i++) {
 753                while (true) {
 754                        numa_node = next_numa_node_id++;
 755                        if (numa_node == nr_node_ids) {
 756                                next_numa_node_id = 0;
 757                                continue;
 758                        }
 759                        if (cpumask_empty(cpumask_of_node(numa_node)))
 760                                continue;
 761                        break;
 762                }
 763                alloced_mask = &hv_context.hv_numa_map[numa_node];
 764
 765                if (cpumask_weight(alloced_mask) ==
 766                    cpumask_weight(cpumask_of_node(numa_node))) {
 767                        /*
 768                         * We have cycled through all the CPUs in the node;
 769                         * reset the alloced map.
 770                         */
 771                        cpumask_clear(alloced_mask);
 772                }
 773
 774                cpumask_xor(available_mask, alloced_mask,
 775                            cpumask_of_node(numa_node));
 776
 777                target_cpu = cpumask_first(available_mask);
 778                cpumask_set_cpu(target_cpu, alloced_mask);
 779
 780                if (channel->offermsg.offer.sub_channel_index >= ncpu ||
 781                    i > ncpu || !hv_cpuself_used(target_cpu, channel))
 782                        break;
 783        }
 784
 785        channel->target_cpu = target_cpu;
 786
 787        free_cpumask_var(available_mask);
 788}
 789
 790#define UNLOAD_DELAY_UNIT_MS    10              /* 10 milliseconds */
 791#define UNLOAD_WAIT_MS          (100*1000)      /* 100 seconds */
 792#define UNLOAD_WAIT_LOOPS       (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
 793#define UNLOAD_MSG_MS           (5*1000)        /* Every 5 seconds */
 794#define UNLOAD_MSG_LOOPS        (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
 795
 796static void vmbus_wait_for_unload(void)
 797{
 798        int cpu;
 799        void *page_addr;
 800        struct hv_message *msg;
 801        struct vmbus_channel_message_header *hdr;
 802        u32 message_type, i;
 803
 804        /*
 805         * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
 806         * used for initial contact or to CPU0 depending on host version. When
 807         * we're crashing on a different CPU let's hope that IRQ handler on
 808         * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
 809         * functional and vmbus_unload_response() will complete
 810         * vmbus_connection.unload_event. If not, the last thing we can do is
 811         * read message pages for all CPUs directly.
 812         *
 813         * Wait up to 100 seconds since an Azure host must writeback any dirty
 814         * data in its disk cache before the VMbus UNLOAD request will
 815         * complete. This flushing has been empirically observed to take up
 816         * to 50 seconds in cases with a lot of dirty data, so allow additional
 817         * leeway and for inaccuracies in mdelay(). But eventually time out so
 818         * that the panic path can't get hung forever in case the response
 819         * message isn't seen.
 820         */
 821        for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
 822                if (completion_done(&vmbus_connection.unload_event))
 823                        goto completed;
 824
 825                for_each_online_cpu(cpu) {
 826                        struct hv_per_cpu_context *hv_cpu
 827                                = per_cpu_ptr(hv_context.cpu_context, cpu);
 828
 829                        page_addr = hv_cpu->synic_message_page;
 830                        msg = (struct hv_message *)page_addr
 831                                + VMBUS_MESSAGE_SINT;
 832
 833                        message_type = READ_ONCE(msg->header.message_type);
 834                        if (message_type == HVMSG_NONE)
 835                                continue;
 836
 837                        hdr = (struct vmbus_channel_message_header *)
 838                                msg->u.payload;
 839
 840                        if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 841                                complete(&vmbus_connection.unload_event);
 842
 843                        vmbus_signal_eom(msg, message_type);
 844                }
 845
 846                /*
 847                 * Give a notice periodically so someone watching the
 848                 * serial output won't think it is completely hung.
 849                 */
 850                if (!(i % UNLOAD_MSG_LOOPS))
 851                        pr_notice("Waiting for VMBus UNLOAD to complete\n");
 852
 853                mdelay(UNLOAD_DELAY_UNIT_MS);
 854        }
 855        pr_err("Continuing even though VMBus UNLOAD did not complete\n");
 856
 857completed:
 858        /*
 859         * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
 860         * maybe-pending messages on all CPUs to be able to receive new
 861         * messages after we reconnect.
 862         */
 863        for_each_online_cpu(cpu) {
 864                struct hv_per_cpu_context *hv_cpu
 865                        = per_cpu_ptr(hv_context.cpu_context, cpu);
 866
 867                page_addr = hv_cpu->synic_message_page;
 868                msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 869                msg->header.message_type = HVMSG_NONE;
 870        }
 871}
 872
 873/*
 874 * vmbus_unload_response - Handler for the unload response.
 875 */
 876static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 877{
 878        /*
 879         * This is a global event; just wakeup the waiting thread.
 880         * Once we successfully unload, we can cleanup the monitor state.
 881         *
 882         * NB.  A malicious or compromised Hyper-V could send a spurious
 883         * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
 884         * of the complete() below.  Make sure that unload_event has been
 885         * initialized by the time this complete() is executed.
 886         */
 887        complete(&vmbus_connection.unload_event);
 888}
 889
 890void vmbus_initiate_unload(bool crash)
 891{
 892        struct vmbus_channel_message_header hdr;
 893
 894        if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
 895                return;
 896
 897        /* Pre-Win2012R2 hosts don't support reconnect */
 898        if (vmbus_proto_version < VERSION_WIN8_1)
 899                return;
 900
 901        reinit_completion(&vmbus_connection.unload_event);
 902        memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 903        hdr.msgtype = CHANNELMSG_UNLOAD;
 904        vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
 905                       !crash);
 906
 907        /*
 908         * vmbus_initiate_unload() is also called on crash and the crash can be
 909         * happening in an interrupt context, where scheduling is impossible.
 910         */
 911        if (!crash)
 912                wait_for_completion(&vmbus_connection.unload_event);
 913        else
 914                vmbus_wait_for_unload();
 915}
 916
 917static void check_ready_for_resume_event(void)
 918{
 919        /*
 920         * If all the old primary channels have been fixed up, then it's safe
 921         * to resume.
 922         */
 923        if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
 924                complete(&vmbus_connection.ready_for_resume_event);
 925}
 926
 927static void vmbus_setup_channel_state(struct vmbus_channel *channel,
 928                                      struct vmbus_channel_offer_channel *offer)
 929{
 930        /*
 931         * Setup state for signalling the host.
 932         */
 933        channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
 934
 935        if (vmbus_proto_version != VERSION_WS2008) {
 936                channel->is_dedicated_interrupt =
 937                                (offer->is_dedicated_interrupt != 0);
 938                channel->sig_event = offer->connection_id;
 939        }
 940
 941        memcpy(&channel->offermsg, offer,
 942               sizeof(struct vmbus_channel_offer_channel));
 943        channel->monitor_grp = (u8)offer->monitorid / 32;
 944        channel->monitor_bit = (u8)offer->monitorid % 32;
 945        channel->device_id = hv_get_dev_type(channel);
 946}
 947
 948/*
 949 * find_primary_channel_by_offer - Get the channel object given the new offer.
 950 * This is only used in the resume path of hibernation.
 951 */
 952static struct vmbus_channel *
 953find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
 954{
 955        struct vmbus_channel *channel = NULL, *iter;
 956        const guid_t *inst1, *inst2;
 957
 958        /* Ignore sub-channel offers. */
 959        if (offer->offer.sub_channel_index != 0)
 960                return NULL;
 961
 962        mutex_lock(&vmbus_connection.channel_mutex);
 963
 964        list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
 965                inst1 = &iter->offermsg.offer.if_instance;
 966                inst2 = &offer->offer.if_instance;
 967
 968                if (guid_equal(inst1, inst2)) {
 969                        channel = iter;
 970                        break;
 971                }
 972        }
 973
 974        mutex_unlock(&vmbus_connection.channel_mutex);
 975
 976        return channel;
 977}
 978
 979static bool vmbus_is_valid_device(const guid_t *guid)
 980{
 981        u16 i;
 982
 983        if (!hv_is_isolation_supported())
 984                return true;
 985
 986        for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) {
 987                if (guid_equal(guid, &vmbus_devs[i].guid))
 988                        return vmbus_devs[i].allowed_in_isolated;
 989        }
 990        return false;
 991}
 992
 993/*
 994 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 995 *
 996 */
 997static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 998{
 999        struct vmbus_channel_offer_channel *offer;
1000        struct vmbus_channel *oldchannel, *newchannel;
1001        size_t offer_sz;
1002
1003        offer = (struct vmbus_channel_offer_channel *)hdr;
1004
1005        trace_vmbus_onoffer(offer);
1006
1007        if (!vmbus_is_valid_device(&offer->offer.if_type)) {
1008                pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
1009                                   offer->child_relid);
1010                atomic_dec(&vmbus_connection.offer_in_progress);
1011                return;
1012        }
1013
1014        oldchannel = find_primary_channel_by_offer(offer);
1015
1016        if (oldchannel != NULL) {
1017                /*
1018                 * We're resuming from hibernation: all the sub-channel and
1019                 * hv_sock channels we had before the hibernation should have
1020                 * been cleaned up, and now we must be seeing a re-offered
1021                 * primary channel that we had before the hibernation.
1022                 */
1023
1024                /*
1025                 * { Initially: channel relid = INVALID_RELID,
1026                 *              channels[valid_relid] = NULL }
1027                 *
1028                 * CPU1                                 CPU2
1029                 *
1030                 * [vmbus_onoffer()]                    [vmbus_device_release()]
1031                 *
1032                 * LOCK channel_mutex                   LOCK channel_mutex
1033                 * STORE channel relid = valid_relid    LOAD r1 = channel relid
1034                 * MAP_RELID channel                    if (r1 != INVALID_RELID)
1035                 * UNLOCK channel_mutex                   UNMAP_RELID channel
1036                 *                                      UNLOCK channel_mutex
1037                 *
1038                 * Forbids: r1 == valid_relid &&
1039                 *              channels[valid_relid] == channel
1040                 *
1041                 * Note.  r1 can be INVALID_RELID only for an hv_sock channel.
1042                 * None of the hv_sock channels which were present before the
1043                 * suspend are re-offered upon the resume.  See the WARN_ON()
1044                 * in hv_process_channel_removal().
1045                 */
1046                mutex_lock(&vmbus_connection.channel_mutex);
1047
1048                atomic_dec(&vmbus_connection.offer_in_progress);
1049
1050                WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
1051                /* Fix up the relid. */
1052                oldchannel->offermsg.child_relid = offer->child_relid;
1053
1054                offer_sz = sizeof(*offer);
1055                if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
1056                        /*
1057                         * This is not an error, since the host can also change
1058                         * the other field(s) of the offer, e.g. on WS RS5
1059                         * (Build 17763), the offer->connection_id of the
1060                         * Mellanox VF vmbus device can change when the host
1061                         * reoffers the device upon resume.
1062                         */
1063                        pr_debug("vmbus offer changed: relid=%d\n",
1064                                 offer->child_relid);
1065
1066                        print_hex_dump_debug("Old vmbus offer: ",
1067                                             DUMP_PREFIX_OFFSET, 16, 4,
1068                                             &oldchannel->offermsg, offer_sz,
1069                                             false);
1070                        print_hex_dump_debug("New vmbus offer: ",
1071                                             DUMP_PREFIX_OFFSET, 16, 4,
1072                                             offer, offer_sz, false);
1073
1074                        /* Fix up the old channel. */
1075                        vmbus_setup_channel_state(oldchannel, offer);
1076                }
1077
1078                /* Add the channel back to the array of channels. */
1079                vmbus_channel_map_relid(oldchannel);
1080                check_ready_for_resume_event();
1081
1082                mutex_unlock(&vmbus_connection.channel_mutex);
1083                return;
1084        }
1085
1086        /* Allocate the channel object and save this offer. */
1087        newchannel = alloc_channel();
1088        if (!newchannel) {
1089                vmbus_release_relid(offer->child_relid);
1090                atomic_dec(&vmbus_connection.offer_in_progress);
1091                pr_err("Unable to allocate channel object\n");
1092                return;
1093        }
1094
1095        vmbus_setup_channel_state(newchannel, offer);
1096
1097        vmbus_process_offer(newchannel);
1098}
1099
1100static void check_ready_for_suspend_event(void)
1101{
1102        /*
1103         * If all the sub-channels or hv_sock channels have been cleaned up,
1104         * then it's safe to suspend.
1105         */
1106        if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
1107                complete(&vmbus_connection.ready_for_suspend_event);
1108}
1109
1110/*
1111 * vmbus_onoffer_rescind - Rescind offer handler.
1112 *
1113 * We queue a work item to process this offer synchronously
1114 */
1115static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
1116{
1117        struct vmbus_channel_rescind_offer *rescind;
1118        struct vmbus_channel *channel;
1119        struct device *dev;
1120        bool clean_up_chan_for_suspend;
1121
1122        rescind = (struct vmbus_channel_rescind_offer *)hdr;
1123
1124        trace_vmbus_onoffer_rescind(rescind);
1125
1126        /*
1127         * The offer msg and the corresponding rescind msg
1128         * from the host are guranteed to be ordered -
1129         * offer comes in first and then the rescind.
1130         * Since we process these events in work elements,
1131         * and with preemption, we may end up processing
1132         * the events out of order.  We rely on the synchronization
1133         * provided by offer_in_progress and by channel_mutex for
1134         * ordering these events:
1135         *
1136         * { Initially: offer_in_progress = 1 }
1137         *
1138         * CPU1                         CPU2
1139         *
1140         * [vmbus_onoffer()]            [vmbus_onoffer_rescind()]
1141         *
1142         * LOCK channel_mutex           WAIT_ON offer_in_progress == 0
1143         * DECREMENT offer_in_progress  LOCK channel_mutex
1144         * STORE channels[]             LOAD channels[]
1145         * UNLOCK channel_mutex         UNLOCK channel_mutex
1146         *
1147         * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
1148         */
1149
1150        while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
1151                /*
1152                 * We wait here until any channel offer is currently
1153                 * being processed.
1154                 */
1155                msleep(1);
1156        }
1157
1158        mutex_lock(&vmbus_connection.channel_mutex);
1159        channel = relid2channel(rescind->child_relid);
1160        if (channel != NULL) {
1161                /*
1162                 * Guarantee that no other instance of vmbus_onoffer_rescind()
1163                 * has got a reference to the channel object.  Synchronize on
1164                 * &vmbus_connection.channel_mutex.
1165                 */
1166                if (channel->rescind_ref) {
1167                        mutex_unlock(&vmbus_connection.channel_mutex);
1168                        return;
1169                }
1170                channel->rescind_ref = true;
1171        }
1172        mutex_unlock(&vmbus_connection.channel_mutex);
1173
1174        if (channel == NULL) {
1175                /*
1176                 * We failed in processing the offer message;
1177                 * we would have cleaned up the relid in that
1178                 * failure path.
1179                 */
1180                return;
1181        }
1182
1183        clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
1184                                    is_sub_channel(channel);
1185        /*
1186         * Before setting channel->rescind in vmbus_rescind_cleanup(), we
1187         * should make sure the channel callback is not running any more.
1188         */
1189        vmbus_reset_channel_cb(channel);
1190
1191        /*
1192         * Now wait for offer handling to complete.
1193         */
1194        vmbus_rescind_cleanup(channel);
1195        while (READ_ONCE(channel->probe_done) == false) {
1196                /*
1197                 * We wait here until any channel offer is currently
1198                 * being processed.
1199                 */
1200                msleep(1);
1201        }
1202
1203        /*
1204         * At this point, the rescind handling can proceed safely.
1205         */
1206
1207        if (channel->device_obj) {
1208                if (channel->chn_rescind_callback) {
1209                        channel->chn_rescind_callback(channel);
1210
1211                        if (clean_up_chan_for_suspend)
1212                                check_ready_for_suspend_event();
1213
1214                        return;
1215                }
1216                /*
1217                 * We will have to unregister this device from the
1218                 * driver core.
1219                 */
1220                dev = get_device(&channel->device_obj->device);
1221                if (dev) {
1222                        vmbus_device_unregister(channel->device_obj);
1223                        put_device(dev);
1224                }
1225        } else if (channel->primary_channel != NULL) {
1226                /*
1227                 * Sub-channel is being rescinded. Following is the channel
1228                 * close sequence when initiated from the driveri (refer to
1229                 * vmbus_close() for details):
1230                 * 1. Close all sub-channels first
1231                 * 2. Then close the primary channel.
1232                 */
1233                mutex_lock(&vmbus_connection.channel_mutex);
1234                if (channel->state == CHANNEL_OPEN_STATE) {
1235                        /*
1236                         * The channel is currently not open;
1237                         * it is safe for us to cleanup the channel.
1238                         */
1239                        hv_process_channel_removal(channel);
1240                } else {
1241                        complete(&channel->rescind_event);
1242                }
1243                mutex_unlock(&vmbus_connection.channel_mutex);
1244        }
1245
1246        /* The "channel" may have been freed. Do not access it any longer. */
1247
1248        if (clean_up_chan_for_suspend)
1249                check_ready_for_suspend_event();
1250}
1251
1252void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
1253{
1254        BUG_ON(!is_hvsock_channel(channel));
1255
1256        /* We always get a rescind msg when a connection is closed. */
1257        while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
1258                msleep(1);
1259
1260        vmbus_device_unregister(channel->device_obj);
1261}
1262EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
1263
1264
1265/*
1266 * vmbus_onoffers_delivered -
1267 * This is invoked when all offers have been delivered.
1268 *
1269 * Nothing to do here.
1270 */
1271static void vmbus_onoffers_delivered(
1272                        struct vmbus_channel_message_header *hdr)
1273{
1274}
1275
1276/*
1277 * vmbus_onopen_result - Open result handler.
1278 *
1279 * This is invoked when we received a response to our channel open request.
1280 * Find the matching request, copy the response and signal the requesting
1281 * thread.
1282 */
1283static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
1284{
1285        struct vmbus_channel_open_result *result;
1286        struct vmbus_channel_msginfo *msginfo;
1287        struct vmbus_channel_message_header *requestheader;
1288        struct vmbus_channel_open_channel *openmsg;
1289        unsigned long flags;
1290
1291        result = (struct vmbus_channel_open_result *)hdr;
1292
1293        trace_vmbus_onopen_result(result);
1294
1295        /*
1296         * Find the open msg, copy the result and signal/unblock the wait event
1297         */
1298        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1299
1300        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1301                                msglistentry) {
1302                requestheader =
1303                        (struct vmbus_channel_message_header *)msginfo->msg;
1304
1305                if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
1306                        openmsg =
1307                        (struct vmbus_channel_open_channel *)msginfo->msg;
1308                        if (openmsg->child_relid == result->child_relid &&
1309                            openmsg->openid == result->openid) {
1310                                memcpy(&msginfo->response.open_result,
1311                                       result,
1312                                       sizeof(
1313                                        struct vmbus_channel_open_result));
1314                                complete(&msginfo->waitevent);
1315                                break;
1316                        }
1317                }
1318        }
1319        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1320}
1321
1322/*
1323 * vmbus_ongpadl_created - GPADL created handler.
1324 *
1325 * This is invoked when we received a response to our gpadl create request.
1326 * Find the matching request, copy the response and signal the requesting
1327 * thread.
1328 */
1329static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
1330{
1331        struct vmbus_channel_gpadl_created *gpadlcreated;
1332        struct vmbus_channel_msginfo *msginfo;
1333        struct vmbus_channel_message_header *requestheader;
1334        struct vmbus_channel_gpadl_header *gpadlheader;
1335        unsigned long flags;
1336
1337        gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
1338
1339        trace_vmbus_ongpadl_created(gpadlcreated);
1340
1341        /*
1342         * Find the establish msg, copy the result and signal/unblock the wait
1343         * event
1344         */
1345        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1346
1347        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1348                                msglistentry) {
1349                requestheader =
1350                        (struct vmbus_channel_message_header *)msginfo->msg;
1351
1352                if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1353                        gpadlheader =
1354                        (struct vmbus_channel_gpadl_header *)requestheader;
1355
1356                        if ((gpadlcreated->child_relid ==
1357                             gpadlheader->child_relid) &&
1358                            (gpadlcreated->gpadl == gpadlheader->gpadl)) {
1359                                memcpy(&msginfo->response.gpadl_created,
1360                                       gpadlcreated,
1361                                       sizeof(
1362                                        struct vmbus_channel_gpadl_created));
1363                                complete(&msginfo->waitevent);
1364                                break;
1365                        }
1366                }
1367        }
1368        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1369}
1370
1371/*
1372 * vmbus_onmodifychannel_response - Modify Channel response handler.
1373 *
1374 * This is invoked when we received a response to our channel modify request.
1375 * Find the matching request, copy the response and signal the requesting thread.
1376 */
1377static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
1378{
1379        struct vmbus_channel_modifychannel_response *response;
1380        struct vmbus_channel_msginfo *msginfo;
1381        unsigned long flags;
1382
1383        response = (struct vmbus_channel_modifychannel_response *)hdr;
1384
1385        trace_vmbus_onmodifychannel_response(response);
1386
1387        /*
1388         * Find the modify msg, copy the response and signal/unblock the wait event.
1389         */
1390        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1391
1392        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
1393                struct vmbus_channel_message_header *responseheader =
1394                                (struct vmbus_channel_message_header *)msginfo->msg;
1395
1396                if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
1397                        struct vmbus_channel_modifychannel *modifymsg;
1398
1399                        modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
1400                        if (modifymsg->child_relid == response->child_relid) {
1401                                memcpy(&msginfo->response.modify_response, response,
1402                                       sizeof(*response));
1403                                complete(&msginfo->waitevent);
1404                                break;
1405                        }
1406                }
1407        }
1408        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1409}
1410
1411/*
1412 * vmbus_ongpadl_torndown - GPADL torndown handler.
1413 *
1414 * This is invoked when we received a response to our gpadl teardown request.
1415 * Find the matching request, copy the response and signal the requesting
1416 * thread.
1417 */
1418static void vmbus_ongpadl_torndown(
1419                        struct vmbus_channel_message_header *hdr)
1420{
1421        struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1422        struct vmbus_channel_msginfo *msginfo;
1423        struct vmbus_channel_message_header *requestheader;
1424        struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1425        unsigned long flags;
1426
1427        gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1428
1429        trace_vmbus_ongpadl_torndown(gpadl_torndown);
1430
1431        /*
1432         * Find the open msg, copy the result and signal/unblock the wait event
1433         */
1434        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1435
1436        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1437                                msglistentry) {
1438                requestheader =
1439                        (struct vmbus_channel_message_header *)msginfo->msg;
1440
1441                if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1442                        gpadl_teardown =
1443                        (struct vmbus_channel_gpadl_teardown *)requestheader;
1444
1445                        if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1446                                memcpy(&msginfo->response.gpadl_torndown,
1447                                       gpadl_torndown,
1448                                       sizeof(
1449                                        struct vmbus_channel_gpadl_torndown));
1450                                complete(&msginfo->waitevent);
1451                                break;
1452                        }
1453                }
1454        }
1455        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1456}
1457
1458/*
1459 * vmbus_onversion_response - Version response handler
1460 *
1461 * This is invoked when we received a response to our initiate contact request.
1462 * Find the matching request, copy the response and signal the requesting
1463 * thread.
1464 */
1465static void vmbus_onversion_response(
1466                struct vmbus_channel_message_header *hdr)
1467{
1468        struct vmbus_channel_msginfo *msginfo;
1469        struct vmbus_channel_message_header *requestheader;
1470        struct vmbus_channel_version_response *version_response;
1471        unsigned long flags;
1472
1473        version_response = (struct vmbus_channel_version_response *)hdr;
1474
1475        trace_vmbus_onversion_response(version_response);
1476
1477        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1478
1479        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1480                                msglistentry) {
1481                requestheader =
1482                        (struct vmbus_channel_message_header *)msginfo->msg;
1483
1484                if (requestheader->msgtype ==
1485                    CHANNELMSG_INITIATE_CONTACT) {
1486                        memcpy(&msginfo->response.version_response,
1487                              version_response,
1488                              sizeof(struct vmbus_channel_version_response));
1489                        complete(&msginfo->waitevent);
1490                }
1491        }
1492        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1493}
1494
1495/* Channel message dispatch table */
1496const struct vmbus_channel_message_table_entry
1497channel_message_table[CHANNELMSG_COUNT] = {
1498        { CHANNELMSG_INVALID,                   0, NULL, 0},
1499        { CHANNELMSG_OFFERCHANNEL,              0, vmbus_onoffer,
1500                sizeof(struct vmbus_channel_offer_channel)},
1501        { CHANNELMSG_RESCIND_CHANNELOFFER,      0, vmbus_onoffer_rescind,
1502                sizeof(struct vmbus_channel_rescind_offer) },
1503        { CHANNELMSG_REQUESTOFFERS,             0, NULL, 0},
1504        { CHANNELMSG_ALLOFFERS_DELIVERED,       1, vmbus_onoffers_delivered, 0},
1505        { CHANNELMSG_OPENCHANNEL,               0, NULL, 0},
1506        { CHANNELMSG_OPENCHANNEL_RESULT,        1, vmbus_onopen_result,
1507                sizeof(struct vmbus_channel_open_result)},
1508        { CHANNELMSG_CLOSECHANNEL,              0, NULL, 0},
1509        { CHANNELMSG_GPADL_HEADER,              0, NULL, 0},
1510        { CHANNELMSG_GPADL_BODY,                0, NULL, 0},
1511        { CHANNELMSG_GPADL_CREATED,             1, vmbus_ongpadl_created,
1512                sizeof(struct vmbus_channel_gpadl_created)},
1513        { CHANNELMSG_GPADL_TEARDOWN,            0, NULL, 0},
1514        { CHANNELMSG_GPADL_TORNDOWN,            1, vmbus_ongpadl_torndown,
1515                sizeof(struct vmbus_channel_gpadl_torndown) },
1516        { CHANNELMSG_RELID_RELEASED,            0, NULL, 0},
1517        { CHANNELMSG_INITIATE_CONTACT,          0, NULL, 0},
1518        { CHANNELMSG_VERSION_RESPONSE,          1, vmbus_onversion_response,
1519                sizeof(struct vmbus_channel_version_response)},
1520        { CHANNELMSG_UNLOAD,                    0, NULL, 0},
1521        { CHANNELMSG_UNLOAD_RESPONSE,           1, vmbus_unload_response, 0},
1522        { CHANNELMSG_18,                        0, NULL, 0},
1523        { CHANNELMSG_19,                        0, NULL, 0},
1524        { CHANNELMSG_20,                        0, NULL, 0},
1525        { CHANNELMSG_TL_CONNECT_REQUEST,        0, NULL, 0},
1526        { CHANNELMSG_MODIFYCHANNEL,             0, NULL, 0},
1527        { CHANNELMSG_TL_CONNECT_RESULT,         0, NULL, 0},
1528        { CHANNELMSG_MODIFYCHANNEL_RESPONSE,    1, vmbus_onmodifychannel_response,
1529                sizeof(struct vmbus_channel_modifychannel_response)},
1530};
1531
1532/*
1533 * vmbus_onmessage - Handler for channel protocol messages.
1534 *
1535 * This is invoked in the vmbus worker thread context.
1536 */
1537void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
1538{
1539        trace_vmbus_on_message(hdr);
1540
1541        /*
1542         * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
1543         * out of bound and the message_handler pointer can not be NULL.
1544         */
1545        channel_message_table[hdr->msgtype].message_handler(hdr);
1546}
1547
1548/*
1549 * vmbus_request_offers - Send a request to get all our pending offers.
1550 */
1551int vmbus_request_offers(void)
1552{
1553        struct vmbus_channel_message_header *msg;
1554        struct vmbus_channel_msginfo *msginfo;
1555        int ret;
1556
1557        msginfo = kmalloc(sizeof(*msginfo) +
1558                          sizeof(struct vmbus_channel_message_header),
1559                          GFP_KERNEL);
1560        if (!msginfo)
1561                return -ENOMEM;
1562
1563        msg = (struct vmbus_channel_message_header *)msginfo->msg;
1564
1565        msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1566
1567        ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1568                             true);
1569
1570        trace_vmbus_request_offers(ret);
1571
1572        if (ret != 0) {
1573                pr_err("Unable to request offers - %d\n", ret);
1574
1575                goto cleanup;
1576        }
1577
1578cleanup:
1579        kfree(msginfo);
1580
1581        return ret;
1582}
1583
1584static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1585{
1586        struct list_head *cur, *tmp;
1587        struct vmbus_channel *cur_channel;
1588
1589        if (primary_channel->sc_creation_callback == NULL)
1590                return;
1591
1592        list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1593                cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1594
1595                primary_channel->sc_creation_callback(cur_channel);
1596        }
1597}
1598
1599void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1600                                void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1601{
1602        primary_channel->sc_creation_callback = sc_cr_cb;
1603}
1604EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1605
1606bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1607{
1608        bool ret;
1609
1610        ret = !list_empty(&primary->sc_list);
1611
1612        if (ret) {
1613                /*
1614                 * Invoke the callback on sub-channel creation.
1615                 * This will present a uniform interface to the
1616                 * clients.
1617                 */
1618                invoke_sc_cb(primary);
1619        }
1620
1621        return ret;
1622}
1623EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1624
1625void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1626                void (*chn_rescind_cb)(struct vmbus_channel *))
1627{
1628        channel->chn_rescind_callback = chn_rescind_cb;
1629}
1630EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1631