linux/drivers/hv/channel_mgmt.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009, Microsoft Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15 * Place - Suite 330, Boston, MA 02111-1307 USA.
  16 *
  17 * Authors:
  18 *   Haiyang Zhang <haiyangz@microsoft.com>
  19 *   Hank Janssen  <hjanssen@microsoft.com>
  20 */
  21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23#include <linux/kernel.h>
  24#include <linux/interrupt.h>
  25#include <linux/sched.h>
  26#include <linux/wait.h>
  27#include <linux/mm.h>
  28#include <linux/slab.h>
  29#include <linux/list.h>
  30#include <linux/module.h>
  31#include <linux/completion.h>
  32#include <linux/delay.h>
  33#include <linux/hyperv.h>
  34
  35#include "hyperv_vmbus.h"
  36
  37static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
  38
  39static const struct vmbus_device vmbus_devs[] = {
  40        /* IDE */
  41        { .dev_type = HV_IDE,
  42          HV_IDE_GUID,
  43          .perf_device = true,
  44        },
  45
  46        /* SCSI */
  47        { .dev_type = HV_SCSI,
  48          HV_SCSI_GUID,
  49          .perf_device = true,
  50        },
  51
  52        /* Fibre Channel */
  53        { .dev_type = HV_FC,
  54          HV_SYNTHFC_GUID,
  55          .perf_device = true,
  56        },
  57
  58        /* Synthetic NIC */
  59        { .dev_type = HV_NIC,
  60          HV_NIC_GUID,
  61          .perf_device = true,
  62        },
  63
  64        /* Network Direct */
  65        { .dev_type = HV_ND,
  66          HV_ND_GUID,
  67          .perf_device = true,
  68        },
  69
  70        /* PCIE */
  71        { .dev_type = HV_PCIE,
  72          HV_PCIE_GUID,
  73          .perf_device = true,
  74        },
  75
  76        /* Synthetic Frame Buffer */
  77        { .dev_type = HV_FB,
  78          HV_SYNTHVID_GUID,
  79          .perf_device = false,
  80        },
  81
  82        /* Synthetic Keyboard */
  83        { .dev_type = HV_KBD,
  84          HV_KBD_GUID,
  85          .perf_device = false,
  86        },
  87
  88        /* Synthetic MOUSE */
  89        { .dev_type = HV_MOUSE,
  90          HV_MOUSE_GUID,
  91          .perf_device = false,
  92        },
  93
  94        /* KVP */
  95        { .dev_type = HV_KVP,
  96          HV_KVP_GUID,
  97          .perf_device = false,
  98        },
  99
 100        /* Time Synch */
 101        { .dev_type = HV_TS,
 102          HV_TS_GUID,
 103          .perf_device = false,
 104        },
 105
 106        /* Heartbeat */
 107        { .dev_type = HV_HB,
 108          HV_HEART_BEAT_GUID,
 109          .perf_device = false,
 110        },
 111
 112        /* Shutdown */
 113        { .dev_type = HV_SHUTDOWN,
 114          HV_SHUTDOWN_GUID,
 115          .perf_device = false,
 116        },
 117
 118        /* File copy */
 119        { .dev_type = HV_FCOPY,
 120          HV_FCOPY_GUID,
 121          .perf_device = false,
 122        },
 123
 124        /* Backup */
 125        { .dev_type = HV_BACKUP,
 126          HV_VSS_GUID,
 127          .perf_device = false,
 128        },
 129
 130        /* Dynamic Memory */
 131        { .dev_type = HV_DM,
 132          HV_DM_GUID,
 133          .perf_device = false,
 134        },
 135
 136        /* Unknown GUID */
 137        { .dev_type = HV_UNKOWN,
 138          .perf_device = false,
 139        },
 140};
 141
 142static const struct {
 143        uuid_le guid;
 144} vmbus_unsupported_devs[] = {
 145        { HV_AVMA1_GUID },
 146        { HV_AVMA2_GUID },
 147        { HV_RDV_GUID   },
 148};
 149
 150static bool is_unsupported_vmbus_devs(const uuid_le *guid)
 151{
 152        int i;
 153
 154        for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
 155                if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
 156                        return true;
 157        return false;
 158}
 159
 160static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 161{
 162        const uuid_le *guid = &channel->offermsg.offer.if_type;
 163        u16 i;
 164
 165        if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 166                return HV_UNKOWN;
 167
 168        for (i = HV_IDE; i < HV_UNKOWN; i++) {
 169                if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 170                        return i;
 171        }
 172        pr_info("Unknown GUID: %pUl\n", guid);
 173        return i;
 174}
 175
 176/**
 177 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
 178 * @icmsghdrp: Pointer to msg header structure
 179 * @icmsg_negotiate: Pointer to negotiate message structure
 180 * @buf: Raw buffer channel data
 181 *
 182 * @icmsghdrp is of type &struct icmsg_hdr.
 183 * @negop is of type &struct icmsg_negotiate.
 184 * Set up and fill in default negotiate response message.
 185 *
 186 * The fw_version specifies the  framework version that
 187 * we can support and srv_version specifies the service
 188 * version we can support.
 189 *
 190 * Mainly used by Hyper-V drivers.
 191 */
 192bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
 193                                struct icmsg_negotiate *negop, u8 *buf,
 194                                int fw_version, int srv_version)
 195{
 196        int icframe_major, icframe_minor;
 197        int icmsg_major, icmsg_minor;
 198        int fw_major, fw_minor;
 199        int srv_major, srv_minor;
 200        int i;
 201        bool found_match = false;
 202
 203        icmsghdrp->icmsgsize = 0x10;
 204        fw_major = (fw_version >> 16);
 205        fw_minor = (fw_version & 0xFFFF);
 206
 207        srv_major = (srv_version >> 16);
 208        srv_minor = (srv_version & 0xFFFF);
 209
 210        negop = (struct icmsg_negotiate *)&buf[
 211                sizeof(struct vmbuspipe_hdr) +
 212                sizeof(struct icmsg_hdr)];
 213
 214        icframe_major = negop->icframe_vercnt;
 215        icframe_minor = 0;
 216
 217        icmsg_major = negop->icmsg_vercnt;
 218        icmsg_minor = 0;
 219
 220        /*
 221         * Select the framework version number we will
 222         * support.
 223         */
 224
 225        for (i = 0; i < negop->icframe_vercnt; i++) {
 226                if ((negop->icversion_data[i].major == fw_major) &&
 227                   (negop->icversion_data[i].minor == fw_minor)) {
 228                        icframe_major = negop->icversion_data[i].major;
 229                        icframe_minor = negop->icversion_data[i].minor;
 230                        found_match = true;
 231                }
 232        }
 233
 234        if (!found_match)
 235                goto fw_error;
 236
 237        found_match = false;
 238
 239        for (i = negop->icframe_vercnt;
 240                 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
 241                if ((negop->icversion_data[i].major == srv_major) &&
 242                   (negop->icversion_data[i].minor == srv_minor)) {
 243                        icmsg_major = negop->icversion_data[i].major;
 244                        icmsg_minor = negop->icversion_data[i].minor;
 245                        found_match = true;
 246                }
 247        }
 248
 249        /*
 250         * Respond with the framework and service
 251         * version numbers we can support.
 252         */
 253
 254fw_error:
 255        if (!found_match) {
 256                negop->icframe_vercnt = 0;
 257                negop->icmsg_vercnt = 0;
 258        } else {
 259                negop->icframe_vercnt = 1;
 260                negop->icmsg_vercnt = 1;
 261        }
 262
 263        negop->icversion_data[0].major = icframe_major;
 264        negop->icversion_data[0].minor = icframe_minor;
 265        negop->icversion_data[1].major = icmsg_major;
 266        negop->icversion_data[1].minor = icmsg_minor;
 267        return found_match;
 268}
 269
 270EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
 271
 272/*
 273 * alloc_channel - Allocate and initialize a vmbus channel object
 274 */
 275static struct vmbus_channel *alloc_channel(void)
 276{
 277        struct vmbus_channel *channel;
 278
 279        channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 280        if (!channel)
 281                return NULL;
 282
 283        channel->acquire_ring_lock = true;
 284        spin_lock_init(&channel->inbound_lock);
 285        spin_lock_init(&channel->lock);
 286
 287        INIT_LIST_HEAD(&channel->sc_list);
 288        INIT_LIST_HEAD(&channel->percpu_list);
 289
 290        return channel;
 291}
 292
 293/*
 294 * free_channel - Release the resources used by the vmbus channel object
 295 */
 296static void free_channel(struct vmbus_channel *channel)
 297{
 298        kfree(channel);
 299}
 300
 301static void percpu_channel_enq(void *arg)
 302{
 303        struct vmbus_channel *channel = arg;
 304        int cpu = smp_processor_id();
 305
 306        list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
 307}
 308
 309static void percpu_channel_deq(void *arg)
 310{
 311        struct vmbus_channel *channel = arg;
 312
 313        list_del(&channel->percpu_list);
 314}
 315
 316
 317static void vmbus_release_relid(u32 relid)
 318{
 319        struct vmbus_channel_relid_released msg;
 320
 321        memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
 322        msg.child_relid = relid;
 323        msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 324        vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 325}
 326
 327void hv_event_tasklet_disable(struct vmbus_channel *channel)
 328{
 329        struct tasklet_struct *tasklet;
 330        tasklet = hv_context.event_dpc[channel->target_cpu];
 331        tasklet_disable(tasklet);
 332}
 333
 334void hv_event_tasklet_enable(struct vmbus_channel *channel)
 335{
 336        struct tasklet_struct *tasklet;
 337        tasklet = hv_context.event_dpc[channel->target_cpu];
 338        tasklet_enable(tasklet);
 339
 340        /* In case there is any pending event */
 341        tasklet_schedule(tasklet);
 342}
 343
 344void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 345{
 346        unsigned long flags;
 347        struct vmbus_channel *primary_channel;
 348
 349        BUG_ON(!channel->rescind);
 350        BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 351
 352        hv_event_tasklet_disable(channel);
 353        if (channel->target_cpu != get_cpu()) {
 354                put_cpu();
 355                smp_call_function_single(channel->target_cpu,
 356                                         percpu_channel_deq, channel, true);
 357        } else {
 358                percpu_channel_deq(channel);
 359                put_cpu();
 360        }
 361        hv_event_tasklet_enable(channel);
 362
 363        if (channel->primary_channel == NULL) {
 364                list_del(&channel->listentry);
 365
 366                primary_channel = channel;
 367        } else {
 368                primary_channel = channel->primary_channel;
 369                spin_lock_irqsave(&primary_channel->lock, flags);
 370                list_del(&channel->sc_list);
 371                primary_channel->num_sc--;
 372                spin_unlock_irqrestore(&primary_channel->lock, flags);
 373        }
 374
 375        /*
 376         * We need to free the bit for init_vp_index() to work in the case
 377         * of sub-channel, when we reload drivers like hv_netvsc.
 378         */
 379        if (channel->affinity_policy == HV_LOCALIZED)
 380                cpumask_clear_cpu(channel->target_cpu,
 381                                  &primary_channel->alloced_cpus_in_node);
 382
 383        vmbus_release_relid(relid);
 384
 385        free_channel(channel);
 386}
 387
 388void vmbus_free_channels(void)
 389{
 390        struct vmbus_channel *channel, *tmp;
 391
 392        list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
 393                listentry) {
 394                /* hv_process_channel_removal() needs this */
 395                channel->rescind = true;
 396
 397                vmbus_device_unregister(channel->device_obj);
 398        }
 399}
 400
 401/*
 402 * vmbus_process_offer - Process the offer by creating a channel/device
 403 * associated with this offer
 404 */
 405static void vmbus_process_offer(struct vmbus_channel *newchannel)
 406{
 407        struct vmbus_channel *channel;
 408        bool fnew = true;
 409        unsigned long flags;
 410        u16 dev_type;
 411        int ret;
 412
 413        /* Make sure this is a new offer */
 414        mutex_lock(&vmbus_connection.channel_mutex);
 415
 416        list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
 417                if (!uuid_le_cmp(channel->offermsg.offer.if_type,
 418                        newchannel->offermsg.offer.if_type) &&
 419                        !uuid_le_cmp(channel->offermsg.offer.if_instance,
 420                                newchannel->offermsg.offer.if_instance)) {
 421                        fnew = false;
 422                        break;
 423                }
 424        }
 425
 426        if (fnew)
 427                list_add_tail(&newchannel->listentry,
 428                              &vmbus_connection.chn_list);
 429
 430        mutex_unlock(&vmbus_connection.channel_mutex);
 431
 432        if (!fnew) {
 433                /*
 434                 * Check to see if this is a sub-channel.
 435                 */
 436                if (newchannel->offermsg.offer.sub_channel_index != 0) {
 437                        /*
 438                         * Process the sub-channel.
 439                         */
 440                        newchannel->primary_channel = channel;
 441                        spin_lock_irqsave(&channel->lock, flags);
 442                        list_add_tail(&newchannel->sc_list, &channel->sc_list);
 443                        channel->num_sc++;
 444                        spin_unlock_irqrestore(&channel->lock, flags);
 445                } else
 446                        goto err_free_chan;
 447        }
 448
 449        dev_type = hv_get_dev_type(newchannel);
 450        if (dev_type == HV_NIC)
 451                set_channel_signal_state(newchannel, HV_SIGNAL_POLICY_EXPLICIT);
 452
 453        init_vp_index(newchannel, dev_type);
 454
 455        hv_event_tasklet_disable(newchannel);
 456        if (newchannel->target_cpu != get_cpu()) {
 457                put_cpu();
 458                smp_call_function_single(newchannel->target_cpu,
 459                                         percpu_channel_enq,
 460                                         newchannel, true);
 461        } else {
 462                percpu_channel_enq(newchannel);
 463                put_cpu();
 464        }
 465        hv_event_tasklet_enable(newchannel);
 466
 467        /*
 468         * This state is used to indicate a successful open
 469         * so that when we do close the channel normally, we
 470         * can cleanup properly
 471         */
 472        newchannel->state = CHANNEL_OPEN_STATE;
 473
 474        if (!fnew) {
 475                if (channel->sc_creation_callback != NULL)
 476                        channel->sc_creation_callback(newchannel);
 477                return;
 478        }
 479
 480        /*
 481         * Start the process of binding this offer to the driver
 482         * We need to set the DeviceObject field before calling
 483         * vmbus_child_dev_add()
 484         */
 485        newchannel->device_obj = vmbus_device_create(
 486                &newchannel->offermsg.offer.if_type,
 487                &newchannel->offermsg.offer.if_instance,
 488                newchannel);
 489        if (!newchannel->device_obj)
 490                goto err_deq_chan;
 491
 492        newchannel->device_obj->device_id = dev_type;
 493        /*
 494         * Add the new device to the bus. This will kick off device-driver
 495         * binding which eventually invokes the device driver's AddDevice()
 496         * method.
 497         */
 498        mutex_lock(&vmbus_connection.channel_mutex);
 499        ret = vmbus_device_register(newchannel->device_obj);
 500        mutex_unlock(&vmbus_connection.channel_mutex);
 501
 502        if (ret != 0) {
 503                pr_err("unable to add child device object (relid %d)\n",
 504                        newchannel->offermsg.child_relid);
 505                kfree(newchannel->device_obj);
 506                goto err_deq_chan;
 507        }
 508        return;
 509
 510err_deq_chan:
 511        mutex_lock(&vmbus_connection.channel_mutex);
 512        list_del(&newchannel->listentry);
 513        mutex_unlock(&vmbus_connection.channel_mutex);
 514
 515        hv_event_tasklet_disable(newchannel);
 516        if (newchannel->target_cpu != get_cpu()) {
 517                put_cpu();
 518                smp_call_function_single(newchannel->target_cpu,
 519                                         percpu_channel_deq, newchannel, true);
 520        } else {
 521                percpu_channel_deq(newchannel);
 522                put_cpu();
 523        }
 524        hv_event_tasklet_enable(newchannel);
 525
 526        vmbus_release_relid(newchannel->offermsg.child_relid);
 527
 528err_free_chan:
 529        free_channel(newchannel);
 530}
 531
 532/*
 533 * We use this state to statically distribute the channel interrupt load.
 534 */
 535static int next_numa_node_id;
 536
 537/*
 538 * Starting with Win8, we can statically distribute the incoming
 539 * channel interrupt load by binding a channel to VCPU.
 540 * We do this in a hierarchical fashion:
 541 * First distribute the primary channels across available NUMA nodes
 542 * and then distribute the subchannels amongst the CPUs in the NUMA
 543 * node assigned to the primary channel.
 544 *
 545 * For pre-win8 hosts or non-performance critical channels we assign the
 546 * first CPU in the first NUMA node.
 547 */
 548static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 549{
 550        u32 cur_cpu;
 551        bool perf_chn = vmbus_devs[dev_type].perf_device;
 552        struct vmbus_channel *primary = channel->primary_channel;
 553        int next_node;
 554        struct cpumask available_mask;
 555        struct cpumask *alloced_mask;
 556
 557        if ((vmbus_proto_version == VERSION_WS2008) ||
 558            (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
 559                /*
 560                 * Prior to win8, all channel interrupts are
 561                 * delivered on cpu 0.
 562                 * Also if the channel is not a performance critical
 563                 * channel, bind it to cpu 0.
 564                 */
 565                channel->numa_node = 0;
 566                channel->target_cpu = 0;
 567                channel->target_vp = hv_context.vp_index[0];
 568                return;
 569        }
 570
 571        /*
 572         * Based on the channel affinity policy, we will assign the NUMA
 573         * nodes.
 574         */
 575
 576        if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 577                while (true) {
 578                        next_node = next_numa_node_id++;
 579                        if (next_node == nr_node_ids) {
 580                                next_node = next_numa_node_id = 0;
 581                                continue;
 582                        }
 583                        if (cpumask_empty(cpumask_of_node(next_node)))
 584                                continue;
 585                        break;
 586                }
 587                channel->numa_node = next_node;
 588                primary = channel;
 589        }
 590        alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
 591
 592        if (cpumask_weight(alloced_mask) ==
 593            cpumask_weight(cpumask_of_node(primary->numa_node))) {
 594                /*
 595                 * We have cycled through all the CPUs in the node;
 596                 * reset the alloced map.
 597                 */
 598                cpumask_clear(alloced_mask);
 599        }
 600
 601        cpumask_xor(&available_mask, alloced_mask,
 602                    cpumask_of_node(primary->numa_node));
 603
 604        cur_cpu = -1;
 605
 606        if (primary->affinity_policy == HV_LOCALIZED) {
 607                /*
 608                 * Normally Hyper-V host doesn't create more subchannels
 609                 * than there are VCPUs on the node but it is possible when not
 610                 * all present VCPUs on the node are initialized by guest.
 611                 * Clear the alloced_cpus_in_node to start over.
 612                 */
 613                if (cpumask_equal(&primary->alloced_cpus_in_node,
 614                                  cpumask_of_node(primary->numa_node)))
 615                        cpumask_clear(&primary->alloced_cpus_in_node);
 616        }
 617
 618        while (true) {
 619                cur_cpu = cpumask_next(cur_cpu, &available_mask);
 620                if (cur_cpu >= nr_cpu_ids) {
 621                        cur_cpu = -1;
 622                        cpumask_copy(&available_mask,
 623                                     cpumask_of_node(primary->numa_node));
 624                        continue;
 625                }
 626
 627                if (primary->affinity_policy == HV_LOCALIZED) {
 628                        /*
 629                         * NOTE: in the case of sub-channel, we clear the
 630                         * sub-channel related bit(s) in
 631                         * primary->alloced_cpus_in_node in
 632                         * hv_process_channel_removal(), so when we
 633                         * reload drivers like hv_netvsc in SMP guest, here
 634                         * we're able to re-allocate
 635                         * bit from primary->alloced_cpus_in_node.
 636                         */
 637                        if (!cpumask_test_cpu(cur_cpu,
 638                                              &primary->alloced_cpus_in_node)) {
 639                                cpumask_set_cpu(cur_cpu,
 640                                                &primary->alloced_cpus_in_node);
 641                                cpumask_set_cpu(cur_cpu, alloced_mask);
 642                                break;
 643                        }
 644                } else {
 645                        cpumask_set_cpu(cur_cpu, alloced_mask);
 646                        break;
 647                }
 648        }
 649
 650        channel->target_cpu = cur_cpu;
 651        channel->target_vp = hv_context.vp_index[cur_cpu];
 652}
 653
 654static void vmbus_wait_for_unload(void)
 655{
 656        int cpu;
 657        void *page_addr;
 658        struct hv_message *msg;
 659        struct vmbus_channel_message_header *hdr;
 660        u32 message_type;
 661
 662        /*
 663         * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
 664         * used for initial contact or to CPU0 depending on host version. When
 665         * we're crashing on a different CPU let's hope that IRQ handler on
 666         * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
 667         * functional and vmbus_unload_response() will complete
 668         * vmbus_connection.unload_event. If not, the last thing we can do is
 669         * read message pages for all CPUs directly.
 670         */
 671        while (1) {
 672                if (completion_done(&vmbus_connection.unload_event))
 673                        break;
 674
 675                for_each_online_cpu(cpu) {
 676                        page_addr = hv_context.synic_message_page[cpu];
 677                        msg = (struct hv_message *)page_addr +
 678                                VMBUS_MESSAGE_SINT;
 679
 680                        message_type = READ_ONCE(msg->header.message_type);
 681                        if (message_type == HVMSG_NONE)
 682                                continue;
 683
 684                        hdr = (struct vmbus_channel_message_header *)
 685                                msg->u.payload;
 686
 687                        if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
 688                                complete(&vmbus_connection.unload_event);
 689
 690                        vmbus_signal_eom(msg, message_type);
 691                }
 692
 693                mdelay(10);
 694        }
 695
 696        /*
 697         * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
 698         * maybe-pending messages on all CPUs to be able to receive new
 699         * messages after we reconnect.
 700         */
 701        for_each_online_cpu(cpu) {
 702                page_addr = hv_context.synic_message_page[cpu];
 703                msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
 704                msg->header.message_type = HVMSG_NONE;
 705        }
 706}
 707
 708/*
 709 * vmbus_unload_response - Handler for the unload response.
 710 */
 711static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
 712{
 713        /*
 714         * This is a global event; just wakeup the waiting thread.
 715         * Once we successfully unload, we can cleanup the monitor state.
 716         */
 717        complete(&vmbus_connection.unload_event);
 718}
 719
 720void vmbus_initiate_unload(bool crash)
 721{
 722        struct vmbus_channel_message_header hdr;
 723
 724        /* Pre-Win2012R2 hosts don't support reconnect */
 725        if (vmbus_proto_version < VERSION_WIN8_1)
 726                return;
 727
 728        init_completion(&vmbus_connection.unload_event);
 729        memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
 730        hdr.msgtype = CHANNELMSG_UNLOAD;
 731        vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
 732
 733        /*
 734         * vmbus_initiate_unload() is also called on crash and the crash can be
 735         * happening in an interrupt context, where scheduling is impossible.
 736         */
 737        if (!crash)
 738                wait_for_completion(&vmbus_connection.unload_event);
 739        else
 740                vmbus_wait_for_unload();
 741}
 742
 743/*
 744 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
 745 *
 746 */
 747static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 748{
 749        struct vmbus_channel_offer_channel *offer;
 750        struct vmbus_channel *newchannel;
 751
 752        offer = (struct vmbus_channel_offer_channel *)hdr;
 753
 754        /* Allocate the channel object and save this offer. */
 755        newchannel = alloc_channel();
 756        if (!newchannel) {
 757                pr_err("Unable to allocate channel object\n");
 758                return;
 759        }
 760
 761        /*
 762         * By default we setup state to enable batched
 763         * reading. A specific service can choose to
 764         * disable this prior to opening the channel.
 765         */
 766        newchannel->batched_reading = true;
 767
 768        /*
 769         * Setup state for signalling the host.
 770         */
 771        newchannel->sig_event = (struct hv_input_signal_event *)
 772                                (ALIGN((unsigned long)
 773                                &newchannel->sig_buf,
 774                                HV_HYPERCALL_PARAM_ALIGN));
 775
 776        newchannel->sig_event->connectionid.asu32 = 0;
 777        newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
 778        newchannel->sig_event->flag_number = 0;
 779        newchannel->sig_event->rsvdz = 0;
 780
 781        if (vmbus_proto_version != VERSION_WS2008) {
 782                newchannel->is_dedicated_interrupt =
 783                                (offer->is_dedicated_interrupt != 0);
 784                newchannel->sig_event->connectionid.u.id =
 785                                offer->connection_id;
 786        }
 787
 788        memcpy(&newchannel->offermsg, offer,
 789               sizeof(struct vmbus_channel_offer_channel));
 790        newchannel->monitor_grp = (u8)offer->monitorid / 32;
 791        newchannel->monitor_bit = (u8)offer->monitorid % 32;
 792
 793        vmbus_process_offer(newchannel);
 794}
 795
 796/*
 797 * vmbus_onoffer_rescind - Rescind offer handler.
 798 *
 799 * We queue a work item to process this offer synchronously
 800 */
 801static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 802{
 803        struct vmbus_channel_rescind_offer *rescind;
 804        struct vmbus_channel *channel;
 805        unsigned long flags;
 806        struct device *dev;
 807
 808        rescind = (struct vmbus_channel_rescind_offer *)hdr;
 809
 810        mutex_lock(&vmbus_connection.channel_mutex);
 811        channel = relid2channel(rescind->child_relid);
 812
 813        if (channel == NULL) {
 814                /*
 815                 * This is very impossible, because in
 816                 * vmbus_process_offer(), we have already invoked
 817                 * vmbus_release_relid() on error.
 818                 */
 819                goto out;
 820        }
 821
 822        spin_lock_irqsave(&channel->lock, flags);
 823        channel->rescind = true;
 824        spin_unlock_irqrestore(&channel->lock, flags);
 825
 826        if (channel->device_obj) {
 827                if (channel->chn_rescind_callback) {
 828                        channel->chn_rescind_callback(channel);
 829                        goto out;
 830                }
 831                /*
 832                 * We will have to unregister this device from the
 833                 * driver core.
 834                 */
 835                dev = get_device(&channel->device_obj->device);
 836                if (dev) {
 837                        vmbus_device_unregister(channel->device_obj);
 838                        put_device(dev);
 839                }
 840        } else {
 841                hv_process_channel_removal(channel,
 842                        channel->offermsg.child_relid);
 843        }
 844
 845out:
 846        mutex_unlock(&vmbus_connection.channel_mutex);
 847}
 848
 849void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
 850{
 851        mutex_lock(&vmbus_connection.channel_mutex);
 852
 853        BUG_ON(!is_hvsock_channel(channel));
 854
 855        channel->rescind = true;
 856        vmbus_device_unregister(channel->device_obj);
 857
 858        mutex_unlock(&vmbus_connection.channel_mutex);
 859}
 860EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
 861
 862
 863/*
 864 * vmbus_onoffers_delivered -
 865 * This is invoked when all offers have been delivered.
 866 *
 867 * Nothing to do here.
 868 */
 869static void vmbus_onoffers_delivered(
 870                        struct vmbus_channel_message_header *hdr)
 871{
 872}
 873
 874/*
 875 * vmbus_onopen_result - Open result handler.
 876 *
 877 * This is invoked when we received a response to our channel open request.
 878 * Find the matching request, copy the response and signal the requesting
 879 * thread.
 880 */
 881static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
 882{
 883        struct vmbus_channel_open_result *result;
 884        struct vmbus_channel_msginfo *msginfo;
 885        struct vmbus_channel_message_header *requestheader;
 886        struct vmbus_channel_open_channel *openmsg;
 887        unsigned long flags;
 888
 889        result = (struct vmbus_channel_open_result *)hdr;
 890
 891        /*
 892         * Find the open msg, copy the result and signal/unblock the wait event
 893         */
 894        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 895
 896        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 897                                msglistentry) {
 898                requestheader =
 899                        (struct vmbus_channel_message_header *)msginfo->msg;
 900
 901                if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
 902                        openmsg =
 903                        (struct vmbus_channel_open_channel *)msginfo->msg;
 904                        if (openmsg->child_relid == result->child_relid &&
 905                            openmsg->openid == result->openid) {
 906                                memcpy(&msginfo->response.open_result,
 907                                       result,
 908                                       sizeof(
 909                                        struct vmbus_channel_open_result));
 910                                complete(&msginfo->waitevent);
 911                                break;
 912                        }
 913                }
 914        }
 915        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 916}
 917
 918/*
 919 * vmbus_ongpadl_created - GPADL created handler.
 920 *
 921 * This is invoked when we received a response to our gpadl create request.
 922 * Find the matching request, copy the response and signal the requesting
 923 * thread.
 924 */
 925static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
 926{
 927        struct vmbus_channel_gpadl_created *gpadlcreated;
 928        struct vmbus_channel_msginfo *msginfo;
 929        struct vmbus_channel_message_header *requestheader;
 930        struct vmbus_channel_gpadl_header *gpadlheader;
 931        unsigned long flags;
 932
 933        gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
 934
 935        /*
 936         * Find the establish msg, copy the result and signal/unblock the wait
 937         * event
 938         */
 939        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 940
 941        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 942                                msglistentry) {
 943                requestheader =
 944                        (struct vmbus_channel_message_header *)msginfo->msg;
 945
 946                if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
 947                        gpadlheader =
 948                        (struct vmbus_channel_gpadl_header *)requestheader;
 949
 950                        if ((gpadlcreated->child_relid ==
 951                             gpadlheader->child_relid) &&
 952                            (gpadlcreated->gpadl == gpadlheader->gpadl)) {
 953                                memcpy(&msginfo->response.gpadl_created,
 954                                       gpadlcreated,
 955                                       sizeof(
 956                                        struct vmbus_channel_gpadl_created));
 957                                complete(&msginfo->waitevent);
 958                                break;
 959                        }
 960                }
 961        }
 962        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 963}
 964
 965/*
 966 * vmbus_ongpadl_torndown - GPADL torndown handler.
 967 *
 968 * This is invoked when we received a response to our gpadl teardown request.
 969 * Find the matching request, copy the response and signal the requesting
 970 * thread.
 971 */
 972static void vmbus_ongpadl_torndown(
 973                        struct vmbus_channel_message_header *hdr)
 974{
 975        struct vmbus_channel_gpadl_torndown *gpadl_torndown;
 976        struct vmbus_channel_msginfo *msginfo;
 977        struct vmbus_channel_message_header *requestheader;
 978        struct vmbus_channel_gpadl_teardown *gpadl_teardown;
 979        unsigned long flags;
 980
 981        gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
 982
 983        /*
 984         * Find the open msg, copy the result and signal/unblock the wait event
 985         */
 986        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 987
 988        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
 989                                msglistentry) {
 990                requestheader =
 991                        (struct vmbus_channel_message_header *)msginfo->msg;
 992
 993                if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
 994                        gpadl_teardown =
 995                        (struct vmbus_channel_gpadl_teardown *)requestheader;
 996
 997                        if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
 998                                memcpy(&msginfo->response.gpadl_torndown,
 999                                       gpadl_torndown,
1000                                       sizeof(
1001                                        struct vmbus_channel_gpadl_torndown));
1002                                complete(&msginfo->waitevent);
1003                                break;
1004                        }
1005                }
1006        }
1007        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1008}
1009
1010/*
1011 * vmbus_onversion_response - Version response handler
1012 *
1013 * This is invoked when we received a response to our initiate contact request.
1014 * Find the matching request, copy the response and signal the requesting
1015 * thread.
1016 */
1017static void vmbus_onversion_response(
1018                struct vmbus_channel_message_header *hdr)
1019{
1020        struct vmbus_channel_msginfo *msginfo;
1021        struct vmbus_channel_message_header *requestheader;
1022        struct vmbus_channel_version_response *version_response;
1023        unsigned long flags;
1024
1025        version_response = (struct vmbus_channel_version_response *)hdr;
1026        spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1027
1028        list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1029                                msglistentry) {
1030                requestheader =
1031                        (struct vmbus_channel_message_header *)msginfo->msg;
1032
1033                if (requestheader->msgtype ==
1034                    CHANNELMSG_INITIATE_CONTACT) {
1035                        memcpy(&msginfo->response.version_response,
1036                              version_response,
1037                              sizeof(struct vmbus_channel_version_response));
1038                        complete(&msginfo->waitevent);
1039                }
1040        }
1041        spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1042}
1043
1044/* Channel message dispatch table */
1045struct vmbus_channel_message_table_entry
1046        channel_message_table[CHANNELMSG_COUNT] = {
1047        {CHANNELMSG_INVALID,                    0, NULL},
1048        {CHANNELMSG_OFFERCHANNEL,               0, vmbus_onoffer},
1049        {CHANNELMSG_RESCIND_CHANNELOFFER,       0, vmbus_onoffer_rescind},
1050        {CHANNELMSG_REQUESTOFFERS,              0, NULL},
1051        {CHANNELMSG_ALLOFFERS_DELIVERED,        1, vmbus_onoffers_delivered},
1052        {CHANNELMSG_OPENCHANNEL,                0, NULL},
1053        {CHANNELMSG_OPENCHANNEL_RESULT,         1, vmbus_onopen_result},
1054        {CHANNELMSG_CLOSECHANNEL,               0, NULL},
1055        {CHANNELMSG_GPADL_HEADER,               0, NULL},
1056        {CHANNELMSG_GPADL_BODY,                 0, NULL},
1057        {CHANNELMSG_GPADL_CREATED,              1, vmbus_ongpadl_created},
1058        {CHANNELMSG_GPADL_TEARDOWN,             0, NULL},
1059        {CHANNELMSG_GPADL_TORNDOWN,             1, vmbus_ongpadl_torndown},
1060        {CHANNELMSG_RELID_RELEASED,             0, NULL},
1061        {CHANNELMSG_INITIATE_CONTACT,           0, NULL},
1062        {CHANNELMSG_VERSION_RESPONSE,           1, vmbus_onversion_response},
1063        {CHANNELMSG_UNLOAD,                     0, NULL},
1064        {CHANNELMSG_UNLOAD_RESPONSE,            1, vmbus_unload_response},
1065        {CHANNELMSG_18,                         0, NULL},
1066        {CHANNELMSG_19,                         0, NULL},
1067        {CHANNELMSG_20,                         0, NULL},
1068        {CHANNELMSG_TL_CONNECT_REQUEST,         0, NULL},
1069};
1070
1071/*
1072 * vmbus_onmessage - Handler for channel protocol messages.
1073 *
1074 * This is invoked in the vmbus worker thread context.
1075 */
1076void vmbus_onmessage(void *context)
1077{
1078        struct hv_message *msg = context;
1079        struct vmbus_channel_message_header *hdr;
1080        int size;
1081
1082        hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1083        size = msg->header.payload_size;
1084
1085        if (hdr->msgtype >= CHANNELMSG_COUNT) {
1086                pr_err("Received invalid channel message type %d size %d\n",
1087                           hdr->msgtype, size);
1088                print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1089                                     (unsigned char *)msg->u.payload, size);
1090                return;
1091        }
1092
1093        if (channel_message_table[hdr->msgtype].message_handler)
1094                channel_message_table[hdr->msgtype].message_handler(hdr);
1095        else
1096                pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1097}
1098
1099/*
1100 * vmbus_request_offers - Send a request to get all our pending offers.
1101 */
1102int vmbus_request_offers(void)
1103{
1104        struct vmbus_channel_message_header *msg;
1105        struct vmbus_channel_msginfo *msginfo;
1106        int ret;
1107
1108        msginfo = kmalloc(sizeof(*msginfo) +
1109                          sizeof(struct vmbus_channel_message_header),
1110                          GFP_KERNEL);
1111        if (!msginfo)
1112                return -ENOMEM;
1113
1114        msg = (struct vmbus_channel_message_header *)msginfo->msg;
1115
1116        msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1117
1118
1119        ret = vmbus_post_msg(msg,
1120                               sizeof(struct vmbus_channel_message_header));
1121        if (ret != 0) {
1122                pr_err("Unable to request offers - %d\n", ret);
1123
1124                goto cleanup;
1125        }
1126
1127cleanup:
1128        kfree(msginfo);
1129
1130        return ret;
1131}
1132
1133/*
1134 * Retrieve the (sub) channel on which to send an outgoing request.
1135 * When a primary channel has multiple sub-channels, we try to
1136 * distribute the load equally amongst all available channels.
1137 */
1138struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1139{
1140        struct list_head *cur, *tmp;
1141        int cur_cpu;
1142        struct vmbus_channel *cur_channel;
1143        struct vmbus_channel *outgoing_channel = primary;
1144        int next_channel;
1145        int i = 1;
1146
1147        if (list_empty(&primary->sc_list))
1148                return outgoing_channel;
1149
1150        next_channel = primary->next_oc++;
1151
1152        if (next_channel > (primary->num_sc)) {
1153                primary->next_oc = 0;
1154                return outgoing_channel;
1155        }
1156
1157        cur_cpu = hv_context.vp_index[get_cpu()];
1158        put_cpu();
1159        list_for_each_safe(cur, tmp, &primary->sc_list) {
1160                cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1161                if (cur_channel->state != CHANNEL_OPENED_STATE)
1162                        continue;
1163
1164                if (cur_channel->target_vp == cur_cpu)
1165                        return cur_channel;
1166
1167                if (i == next_channel)
1168                        return cur_channel;
1169
1170                i++;
1171        }
1172
1173        return outgoing_channel;
1174}
1175EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1176
1177static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1178{
1179        struct list_head *cur, *tmp;
1180        struct vmbus_channel *cur_channel;
1181
1182        if (primary_channel->sc_creation_callback == NULL)
1183                return;
1184
1185        list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1186                cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1187
1188                primary_channel->sc_creation_callback(cur_channel);
1189        }
1190}
1191
1192void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1193                                void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1194{
1195        primary_channel->sc_creation_callback = sc_cr_cb;
1196}
1197EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1198
1199bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1200{
1201        bool ret;
1202
1203        ret = !list_empty(&primary->sc_list);
1204
1205        if (ret) {
1206                /*
1207                 * Invoke the callback on sub-channel creation.
1208                 * This will present a uniform interface to the
1209                 * clients.
1210                 */
1211                invoke_sc_cb(primary);
1212        }
1213
1214        return ret;
1215}
1216EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1217
1218void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1219                void (*chn_rescind_cb)(struct vmbus_channel *))
1220{
1221        channel->chn_rescind_callback = chn_rescind_cb;
1222}
1223EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1224