linux/drivers/hv/vmbus_drv.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009, Microsoft Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15 * Place - Suite 330, Boston, MA 02111-1307 USA.
  16 *
  17 * Authors:
  18 *   Haiyang Zhang <haiyangz@microsoft.com>
  19 *   Hank Janssen  <hjanssen@microsoft.com>
  20 *   K. Y. Srinivasan <kys@microsoft.com>
  21 *
  22 */
  23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  24
  25#include <linux/init.h>
  26#include <linux/module.h>
  27#include <linux/device.h>
  28#include <linux/interrupt.h>
  29#include <linux/sysctl.h>
  30#include <linux/slab.h>
  31#include <linux/acpi.h>
  32#include <linux/completion.h>
  33#include <linux/hyperv.h>
  34#include <linux/kernel_stat.h>
  35#include <linux/clockchips.h>
  36#include <linux/cpu.h>
  37#include <linux/sched/task_stack.h>
  38
  39#include <asm/mshyperv.h>
  40#include <linux/notifier.h>
  41#include <linux/ptrace.h>
  42#include <linux/screen_info.h>
  43#include <linux/kdebug.h>
  44#include <linux/efi.h>
  45#include <linux/random.h>
  46#include "hyperv_vmbus.h"
  47
  48struct vmbus_dynid {
  49        struct list_head node;
  50        struct hv_vmbus_device_id id;
  51};
  52
  53static struct acpi_device  *hv_acpi_dev;
  54
  55static struct completion probe_event;
  56
  57static int hyperv_cpuhp_online;
  58
  59static void *hv_panic_page;
  60
  61static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
  62                              void *args)
  63{
  64        struct pt_regs *regs;
  65
  66        regs = current_pt_regs();
  67
  68        hyperv_report_panic(regs, val);
  69        return NOTIFY_DONE;
  70}
  71
  72static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
  73                            void *args)
  74{
  75        struct die_args *die = (struct die_args *)args;
  76        struct pt_regs *regs = die->regs;
  77
  78        hyperv_report_panic(regs, val);
  79        return NOTIFY_DONE;
  80}
  81
  82static struct notifier_block hyperv_die_block = {
  83        .notifier_call = hyperv_die_event,
  84};
  85static struct notifier_block hyperv_panic_block = {
  86        .notifier_call = hyperv_panic_event,
  87};
  88
  89static const char *fb_mmio_name = "fb_range";
  90static struct resource *fb_mmio;
  91static struct resource *hyperv_mmio;
  92static DEFINE_SEMAPHORE(hyperv_mmio_lock);
  93
  94static int vmbus_exists(void)
  95{
  96        if (hv_acpi_dev == NULL)
  97                return -ENODEV;
  98
  99        return 0;
 100}
 101
 102#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
 103static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
 104{
 105        int i;
 106        for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
 107                sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
 108}
 109
 110static u8 channel_monitor_group(const struct vmbus_channel *channel)
 111{
 112        return (u8)channel->offermsg.monitorid / 32;
 113}
 114
 115static u8 channel_monitor_offset(const struct vmbus_channel *channel)
 116{
 117        return (u8)channel->offermsg.monitorid % 32;
 118}
 119
 120static u32 channel_pending(const struct vmbus_channel *channel,
 121                           const struct hv_monitor_page *monitor_page)
 122{
 123        u8 monitor_group = channel_monitor_group(channel);
 124
 125        return monitor_page->trigger_group[monitor_group].pending;
 126}
 127
 128static u32 channel_latency(const struct vmbus_channel *channel,
 129                           const struct hv_monitor_page *monitor_page)
 130{
 131        u8 monitor_group = channel_monitor_group(channel);
 132        u8 monitor_offset = channel_monitor_offset(channel);
 133
 134        return monitor_page->latency[monitor_group][monitor_offset];
 135}
 136
 137static u32 channel_conn_id(struct vmbus_channel *channel,
 138                           struct hv_monitor_page *monitor_page)
 139{
 140        u8 monitor_group = channel_monitor_group(channel);
 141        u8 monitor_offset = channel_monitor_offset(channel);
 142        return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
 143}
 144
 145static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 146                       char *buf)
 147{
 148        struct hv_device *hv_dev = device_to_hv_device(dev);
 149
 150        if (!hv_dev->channel)
 151                return -ENODEV;
 152        return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
 153}
 154static DEVICE_ATTR_RO(id);
 155
 156static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
 157                          char *buf)
 158{
 159        struct hv_device *hv_dev = device_to_hv_device(dev);
 160
 161        if (!hv_dev->channel)
 162                return -ENODEV;
 163        return sprintf(buf, "%d\n", hv_dev->channel->state);
 164}
 165static DEVICE_ATTR_RO(state);
 166
 167static ssize_t monitor_id_show(struct device *dev,
 168                               struct device_attribute *dev_attr, char *buf)
 169{
 170        struct hv_device *hv_dev = device_to_hv_device(dev);
 171
 172        if (!hv_dev->channel)
 173                return -ENODEV;
 174        return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
 175}
 176static DEVICE_ATTR_RO(monitor_id);
 177
 178static ssize_t class_id_show(struct device *dev,
 179                               struct device_attribute *dev_attr, char *buf)
 180{
 181        struct hv_device *hv_dev = device_to_hv_device(dev);
 182
 183        if (!hv_dev->channel)
 184                return -ENODEV;
 185        return sprintf(buf, "{%pUl}\n",
 186                       hv_dev->channel->offermsg.offer.if_type.b);
 187}
 188static DEVICE_ATTR_RO(class_id);
 189
 190static ssize_t device_id_show(struct device *dev,
 191                              struct device_attribute *dev_attr, char *buf)
 192{
 193        struct hv_device *hv_dev = device_to_hv_device(dev);
 194
 195        if (!hv_dev->channel)
 196                return -ENODEV;
 197        return sprintf(buf, "{%pUl}\n",
 198                       hv_dev->channel->offermsg.offer.if_instance.b);
 199}
 200static DEVICE_ATTR_RO(device_id);
 201
 202static ssize_t modalias_show(struct device *dev,
 203                             struct device_attribute *dev_attr, char *buf)
 204{
 205        struct hv_device *hv_dev = device_to_hv_device(dev);
 206        char alias_name[VMBUS_ALIAS_LEN + 1];
 207
 208        print_alias_name(hv_dev, alias_name);
 209        return sprintf(buf, "vmbus:%s\n", alias_name);
 210}
 211static DEVICE_ATTR_RO(modalias);
 212
 213#ifdef CONFIG_NUMA
 214static ssize_t numa_node_show(struct device *dev,
 215                              struct device_attribute *attr, char *buf)
 216{
 217        struct hv_device *hv_dev = device_to_hv_device(dev);
 218
 219        if (!hv_dev->channel)
 220                return -ENODEV;
 221
 222        return sprintf(buf, "%d\n", hv_dev->channel->numa_node);
 223}
 224static DEVICE_ATTR_RO(numa_node);
 225#endif
 226
 227static ssize_t server_monitor_pending_show(struct device *dev,
 228                                           struct device_attribute *dev_attr,
 229                                           char *buf)
 230{
 231        struct hv_device *hv_dev = device_to_hv_device(dev);
 232
 233        if (!hv_dev->channel)
 234                return -ENODEV;
 235        return sprintf(buf, "%d\n",
 236                       channel_pending(hv_dev->channel,
 237                                       vmbus_connection.monitor_pages[1]));
 238}
 239static DEVICE_ATTR_RO(server_monitor_pending);
 240
 241static ssize_t client_monitor_pending_show(struct device *dev,
 242                                           struct device_attribute *dev_attr,
 243                                           char *buf)
 244{
 245        struct hv_device *hv_dev = device_to_hv_device(dev);
 246
 247        if (!hv_dev->channel)
 248                return -ENODEV;
 249        return sprintf(buf, "%d\n",
 250                       channel_pending(hv_dev->channel,
 251                                       vmbus_connection.monitor_pages[1]));
 252}
 253static DEVICE_ATTR_RO(client_monitor_pending);
 254
 255static ssize_t server_monitor_latency_show(struct device *dev,
 256                                           struct device_attribute *dev_attr,
 257                                           char *buf)
 258{
 259        struct hv_device *hv_dev = device_to_hv_device(dev);
 260
 261        if (!hv_dev->channel)
 262                return -ENODEV;
 263        return sprintf(buf, "%d\n",
 264                       channel_latency(hv_dev->channel,
 265                                       vmbus_connection.monitor_pages[0]));
 266}
 267static DEVICE_ATTR_RO(server_monitor_latency);
 268
 269static ssize_t client_monitor_latency_show(struct device *dev,
 270                                           struct device_attribute *dev_attr,
 271                                           char *buf)
 272{
 273        struct hv_device *hv_dev = device_to_hv_device(dev);
 274
 275        if (!hv_dev->channel)
 276                return -ENODEV;
 277        return sprintf(buf, "%d\n",
 278                       channel_latency(hv_dev->channel,
 279                                       vmbus_connection.monitor_pages[1]));
 280}
 281static DEVICE_ATTR_RO(client_monitor_latency);
 282
 283static ssize_t server_monitor_conn_id_show(struct device *dev,
 284                                           struct device_attribute *dev_attr,
 285                                           char *buf)
 286{
 287        struct hv_device *hv_dev = device_to_hv_device(dev);
 288
 289        if (!hv_dev->channel)
 290                return -ENODEV;
 291        return sprintf(buf, "%d\n",
 292                       channel_conn_id(hv_dev->channel,
 293                                       vmbus_connection.monitor_pages[0]));
 294}
 295static DEVICE_ATTR_RO(server_monitor_conn_id);
 296
 297static ssize_t client_monitor_conn_id_show(struct device *dev,
 298                                           struct device_attribute *dev_attr,
 299                                           char *buf)
 300{
 301        struct hv_device *hv_dev = device_to_hv_device(dev);
 302
 303        if (!hv_dev->channel)
 304                return -ENODEV;
 305        return sprintf(buf, "%d\n",
 306                       channel_conn_id(hv_dev->channel,
 307                                       vmbus_connection.monitor_pages[1]));
 308}
 309static DEVICE_ATTR_RO(client_monitor_conn_id);
 310
 311static ssize_t out_intr_mask_show(struct device *dev,
 312                                  struct device_attribute *dev_attr, char *buf)
 313{
 314        struct hv_device *hv_dev = device_to_hv_device(dev);
 315        struct hv_ring_buffer_debug_info outbound;
 316
 317        if (!hv_dev->channel)
 318                return -ENODEV;
 319        hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
 320        return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
 321}
 322static DEVICE_ATTR_RO(out_intr_mask);
 323
 324static ssize_t out_read_index_show(struct device *dev,
 325                                   struct device_attribute *dev_attr, char *buf)
 326{
 327        struct hv_device *hv_dev = device_to_hv_device(dev);
 328        struct hv_ring_buffer_debug_info outbound;
 329
 330        if (!hv_dev->channel)
 331                return -ENODEV;
 332        hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
 333        return sprintf(buf, "%d\n", outbound.current_read_index);
 334}
 335static DEVICE_ATTR_RO(out_read_index);
 336
 337static ssize_t out_write_index_show(struct device *dev,
 338                                    struct device_attribute *dev_attr,
 339                                    char *buf)
 340{
 341        struct hv_device *hv_dev = device_to_hv_device(dev);
 342        struct hv_ring_buffer_debug_info outbound;
 343
 344        if (!hv_dev->channel)
 345                return -ENODEV;
 346        hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
 347        return sprintf(buf, "%d\n", outbound.current_write_index);
 348}
 349static DEVICE_ATTR_RO(out_write_index);
 350
 351static ssize_t out_read_bytes_avail_show(struct device *dev,
 352                                         struct device_attribute *dev_attr,
 353                                         char *buf)
 354{
 355        struct hv_device *hv_dev = device_to_hv_device(dev);
 356        struct hv_ring_buffer_debug_info outbound;
 357
 358        if (!hv_dev->channel)
 359                return -ENODEV;
 360        hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
 361        return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
 362}
 363static DEVICE_ATTR_RO(out_read_bytes_avail);
 364
 365static ssize_t out_write_bytes_avail_show(struct device *dev,
 366                                          struct device_attribute *dev_attr,
 367                                          char *buf)
 368{
 369        struct hv_device *hv_dev = device_to_hv_device(dev);
 370        struct hv_ring_buffer_debug_info outbound;
 371
 372        if (!hv_dev->channel)
 373                return -ENODEV;
 374        hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
 375        return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
 376}
 377static DEVICE_ATTR_RO(out_write_bytes_avail);
 378
 379static ssize_t in_intr_mask_show(struct device *dev,
 380                                 struct device_attribute *dev_attr, char *buf)
 381{
 382        struct hv_device *hv_dev = device_to_hv_device(dev);
 383        struct hv_ring_buffer_debug_info inbound;
 384
 385        if (!hv_dev->channel)
 386                return -ENODEV;
 387        hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
 388        return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
 389}
 390static DEVICE_ATTR_RO(in_intr_mask);
 391
 392static ssize_t in_read_index_show(struct device *dev,
 393                                  struct device_attribute *dev_attr, char *buf)
 394{
 395        struct hv_device *hv_dev = device_to_hv_device(dev);
 396        struct hv_ring_buffer_debug_info inbound;
 397
 398        if (!hv_dev->channel)
 399                return -ENODEV;
 400        hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
 401        return sprintf(buf, "%d\n", inbound.current_read_index);
 402}
 403static DEVICE_ATTR_RO(in_read_index);
 404
 405static ssize_t in_write_index_show(struct device *dev,
 406                                   struct device_attribute *dev_attr, char *buf)
 407{
 408        struct hv_device *hv_dev = device_to_hv_device(dev);
 409        struct hv_ring_buffer_debug_info inbound;
 410
 411        if (!hv_dev->channel)
 412                return -ENODEV;
 413        hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
 414        return sprintf(buf, "%d\n", inbound.current_write_index);
 415}
 416static DEVICE_ATTR_RO(in_write_index);
 417
 418static ssize_t in_read_bytes_avail_show(struct device *dev,
 419                                        struct device_attribute *dev_attr,
 420                                        char *buf)
 421{
 422        struct hv_device *hv_dev = device_to_hv_device(dev);
 423        struct hv_ring_buffer_debug_info inbound;
 424
 425        if (!hv_dev->channel)
 426                return -ENODEV;
 427        hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
 428        return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
 429}
 430static DEVICE_ATTR_RO(in_read_bytes_avail);
 431
 432static ssize_t in_write_bytes_avail_show(struct device *dev,
 433                                         struct device_attribute *dev_attr,
 434                                         char *buf)
 435{
 436        struct hv_device *hv_dev = device_to_hv_device(dev);
 437        struct hv_ring_buffer_debug_info inbound;
 438
 439        if (!hv_dev->channel)
 440                return -ENODEV;
 441        hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
 442        return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
 443}
 444static DEVICE_ATTR_RO(in_write_bytes_avail);
 445
 446static ssize_t channel_vp_mapping_show(struct device *dev,
 447                                       struct device_attribute *dev_attr,
 448                                       char *buf)
 449{
 450        struct hv_device *hv_dev = device_to_hv_device(dev);
 451        struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
 452        unsigned long flags;
 453        int buf_size = PAGE_SIZE, n_written, tot_written;
 454        struct list_head *cur;
 455
 456        if (!channel)
 457                return -ENODEV;
 458
 459        tot_written = snprintf(buf, buf_size, "%u:%u\n",
 460                channel->offermsg.child_relid, channel->target_cpu);
 461
 462        spin_lock_irqsave(&channel->lock, flags);
 463
 464        list_for_each(cur, &channel->sc_list) {
 465                if (tot_written >= buf_size - 1)
 466                        break;
 467
 468                cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
 469                n_written = scnprintf(buf + tot_written,
 470                                     buf_size - tot_written,
 471                                     "%u:%u\n",
 472                                     cur_sc->offermsg.child_relid,
 473                                     cur_sc->target_cpu);
 474                tot_written += n_written;
 475        }
 476
 477        spin_unlock_irqrestore(&channel->lock, flags);
 478
 479        return tot_written;
 480}
 481static DEVICE_ATTR_RO(channel_vp_mapping);
 482
 483static ssize_t vendor_show(struct device *dev,
 484                           struct device_attribute *dev_attr,
 485                           char *buf)
 486{
 487        struct hv_device *hv_dev = device_to_hv_device(dev);
 488        return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
 489}
 490static DEVICE_ATTR_RO(vendor);
 491
 492static ssize_t device_show(struct device *dev,
 493                           struct device_attribute *dev_attr,
 494                           char *buf)
 495{
 496        struct hv_device *hv_dev = device_to_hv_device(dev);
 497        return sprintf(buf, "0x%x\n", hv_dev->device_id);
 498}
 499static DEVICE_ATTR_RO(device);
 500
 501/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 502static struct attribute *vmbus_dev_attrs[] = {
 503        &dev_attr_id.attr,
 504        &dev_attr_state.attr,
 505        &dev_attr_monitor_id.attr,
 506        &dev_attr_class_id.attr,
 507        &dev_attr_device_id.attr,
 508        &dev_attr_modalias.attr,
 509#ifdef CONFIG_NUMA
 510        &dev_attr_numa_node.attr,
 511#endif
 512        &dev_attr_server_monitor_pending.attr,
 513        &dev_attr_client_monitor_pending.attr,
 514        &dev_attr_server_monitor_latency.attr,
 515        &dev_attr_client_monitor_latency.attr,
 516        &dev_attr_server_monitor_conn_id.attr,
 517        &dev_attr_client_monitor_conn_id.attr,
 518        &dev_attr_out_intr_mask.attr,
 519        &dev_attr_out_read_index.attr,
 520        &dev_attr_out_write_index.attr,
 521        &dev_attr_out_read_bytes_avail.attr,
 522        &dev_attr_out_write_bytes_avail.attr,
 523        &dev_attr_in_intr_mask.attr,
 524        &dev_attr_in_read_index.attr,
 525        &dev_attr_in_write_index.attr,
 526        &dev_attr_in_read_bytes_avail.attr,
 527        &dev_attr_in_write_bytes_avail.attr,
 528        &dev_attr_channel_vp_mapping.attr,
 529        &dev_attr_vendor.attr,
 530        &dev_attr_device.attr,
 531        NULL,
 532};
 533ATTRIBUTE_GROUPS(vmbus_dev);
 534
 535/*
 536 * vmbus_uevent - add uevent for our device
 537 *
 538 * This routine is invoked when a device is added or removed on the vmbus to
 539 * generate a uevent to udev in the userspace. The udev will then look at its
 540 * rule and the uevent generated here to load the appropriate driver
 541 *
 542 * The alias string will be of the form vmbus:guid where guid is the string
 543 * representation of the device guid (each byte of the guid will be
 544 * represented with two hex characters.
 545 */
 546static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
 547{
 548        struct hv_device *dev = device_to_hv_device(device);
 549        int ret;
 550        char alias_name[VMBUS_ALIAS_LEN + 1];
 551
 552        print_alias_name(dev, alias_name);
 553        ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
 554        return ret;
 555}
 556
 557static const uuid_le null_guid;
 558
 559static inline bool is_null_guid(const uuid_le *guid)
 560{
 561        if (uuid_le_cmp(*guid, null_guid))
 562                return false;
 563        return true;
 564}
 565
 566/*
 567 * Return a matching hv_vmbus_device_id pointer.
 568 * If there is no match, return NULL.
 569 */
 570static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
 571                                        const uuid_le *guid)
 572{
 573        const struct hv_vmbus_device_id *id = NULL;
 574        struct vmbus_dynid *dynid;
 575
 576        /* Look at the dynamic ids first, before the static ones */
 577        spin_lock(&drv->dynids.lock);
 578        list_for_each_entry(dynid, &drv->dynids.list, node) {
 579                if (!uuid_le_cmp(dynid->id.guid, *guid)) {
 580                        id = &dynid->id;
 581                        break;
 582                }
 583        }
 584        spin_unlock(&drv->dynids.lock);
 585
 586        if (id)
 587                return id;
 588
 589        id = drv->id_table;
 590        if (id == NULL)
 591                return NULL; /* empty device table */
 592
 593        for (; !is_null_guid(&id->guid); id++)
 594                if (!uuid_le_cmp(id->guid, *guid))
 595                        return id;
 596
 597        return NULL;
 598}
 599
 600/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
 601static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid)
 602{
 603        struct vmbus_dynid *dynid;
 604
 605        dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
 606        if (!dynid)
 607                return -ENOMEM;
 608
 609        dynid->id.guid = *guid;
 610
 611        spin_lock(&drv->dynids.lock);
 612        list_add_tail(&dynid->node, &drv->dynids.list);
 613        spin_unlock(&drv->dynids.lock);
 614
 615        return driver_attach(&drv->driver);
 616}
 617
 618static void vmbus_free_dynids(struct hv_driver *drv)
 619{
 620        struct vmbus_dynid *dynid, *n;
 621
 622        spin_lock(&drv->dynids.lock);
 623        list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
 624                list_del(&dynid->node);
 625                kfree(dynid);
 626        }
 627        spin_unlock(&drv->dynids.lock);
 628}
 629
 630/*
 631 * store_new_id - sysfs frontend to vmbus_add_dynid()
 632 *
 633 * Allow GUIDs to be added to an existing driver via sysfs.
 634 */
 635static ssize_t new_id_store(struct device_driver *driver, const char *buf,
 636                            size_t count)
 637{
 638        struct hv_driver *drv = drv_to_hv_drv(driver);
 639        uuid_le guid;
 640        ssize_t retval;
 641
 642        retval = uuid_le_to_bin(buf, &guid);
 643        if (retval)
 644                return retval;
 645
 646        if (hv_vmbus_get_id(drv, &guid))
 647                return -EEXIST;
 648
 649        retval = vmbus_add_dynid(drv, &guid);
 650        if (retval)
 651                return retval;
 652        return count;
 653}
 654static DRIVER_ATTR_WO(new_id);
 655
 656/*
 657 * store_remove_id - remove a PCI device ID from this driver
 658 *
 659 * Removes a dynamic pci device ID to this driver.
 660 */
 661static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
 662                               size_t count)
 663{
 664        struct hv_driver *drv = drv_to_hv_drv(driver);
 665        struct vmbus_dynid *dynid, *n;
 666        uuid_le guid;
 667        ssize_t retval;
 668
 669        retval = uuid_le_to_bin(buf, &guid);
 670        if (retval)
 671                return retval;
 672
 673        retval = -ENODEV;
 674        spin_lock(&drv->dynids.lock);
 675        list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
 676                struct hv_vmbus_device_id *id = &dynid->id;
 677
 678                if (!uuid_le_cmp(id->guid, guid)) {
 679                        list_del(&dynid->node);
 680                        kfree(dynid);
 681                        retval = count;
 682                        break;
 683                }
 684        }
 685        spin_unlock(&drv->dynids.lock);
 686
 687        return retval;
 688}
 689static DRIVER_ATTR_WO(remove_id);
 690
 691static struct attribute *vmbus_drv_attrs[] = {
 692        &driver_attr_new_id.attr,
 693        &driver_attr_remove_id.attr,
 694        NULL,
 695};
 696ATTRIBUTE_GROUPS(vmbus_drv);
 697
 698
 699/*
 700 * vmbus_match - Attempt to match the specified device to the specified driver
 701 */
 702static int vmbus_match(struct device *device, struct device_driver *driver)
 703{
 704        struct hv_driver *drv = drv_to_hv_drv(driver);
 705        struct hv_device *hv_dev = device_to_hv_device(device);
 706
 707        /* The hv_sock driver handles all hv_sock offers. */
 708        if (is_hvsock_channel(hv_dev->channel))
 709                return drv->hvsock;
 710
 711        if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
 712                return 1;
 713
 714        return 0;
 715}
 716
 717/*
 718 * vmbus_probe - Add the new vmbus's child device
 719 */
 720static int vmbus_probe(struct device *child_device)
 721{
 722        int ret = 0;
 723        struct hv_driver *drv =
 724                        drv_to_hv_drv(child_device->driver);
 725        struct hv_device *dev = device_to_hv_device(child_device);
 726        const struct hv_vmbus_device_id *dev_id;
 727
 728        dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
 729        if (drv->probe) {
 730                ret = drv->probe(dev, dev_id);
 731                if (ret != 0)
 732                        pr_err("probe failed for device %s (%d)\n",
 733                               dev_name(child_device), ret);
 734
 735        } else {
 736                pr_err("probe not set for driver %s\n",
 737                       dev_name(child_device));
 738                ret = -ENODEV;
 739        }
 740        return ret;
 741}
 742
 743/*
 744 * vmbus_remove - Remove a vmbus device
 745 */
 746static int vmbus_remove(struct device *child_device)
 747{
 748        struct hv_driver *drv;
 749        struct hv_device *dev = device_to_hv_device(child_device);
 750
 751        if (child_device->driver) {
 752                drv = drv_to_hv_drv(child_device->driver);
 753                if (drv->remove)
 754                        drv->remove(dev);
 755        }
 756
 757        return 0;
 758}
 759
 760
 761/*
 762 * vmbus_shutdown - Shutdown a vmbus device
 763 */
 764static void vmbus_shutdown(struct device *child_device)
 765{
 766        struct hv_driver *drv;
 767        struct hv_device *dev = device_to_hv_device(child_device);
 768
 769
 770        /* The device may not be attached yet */
 771        if (!child_device->driver)
 772                return;
 773
 774        drv = drv_to_hv_drv(child_device->driver);
 775
 776        if (drv->shutdown)
 777                drv->shutdown(dev);
 778}
 779
 780
 781/*
 782 * vmbus_device_release - Final callback release of the vmbus child device
 783 */
 784static void vmbus_device_release(struct device *device)
 785{
 786        struct hv_device *hv_dev = device_to_hv_device(device);
 787        struct vmbus_channel *channel = hv_dev->channel;
 788
 789        mutex_lock(&vmbus_connection.channel_mutex);
 790        hv_process_channel_removal(channel->offermsg.child_relid);
 791        mutex_unlock(&vmbus_connection.channel_mutex);
 792        kfree(hv_dev);
 793
 794}
 795
 796/* The one and only one */
 797static struct bus_type  hv_bus = {
 798        .name =         "vmbus",
 799        .match =                vmbus_match,
 800        .shutdown =             vmbus_shutdown,
 801        .remove =               vmbus_remove,
 802        .probe =                vmbus_probe,
 803        .uevent =               vmbus_uevent,
 804        .dev_groups =           vmbus_dev_groups,
 805        .drv_groups =           vmbus_drv_groups,
 806};
 807
 808struct onmessage_work_context {
 809        struct work_struct work;
 810        struct hv_message msg;
 811};
 812
 813static void vmbus_onmessage_work(struct work_struct *work)
 814{
 815        struct onmessage_work_context *ctx;
 816
 817        /* Do not process messages if we're in DISCONNECTED state */
 818        if (vmbus_connection.conn_state == DISCONNECTED)
 819                return;
 820
 821        ctx = container_of(work, struct onmessage_work_context,
 822                           work);
 823        vmbus_onmessage(&ctx->msg);
 824        kfree(ctx);
 825}
 826
 827static void hv_process_timer_expiration(struct hv_message *msg,
 828                                        struct hv_per_cpu_context *hv_cpu)
 829{
 830        struct clock_event_device *dev = hv_cpu->clk_evt;
 831
 832        if (dev->event_handler)
 833                dev->event_handler(dev);
 834
 835        vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
 836}
 837
 838void vmbus_on_msg_dpc(unsigned long data)
 839{
 840        struct hv_per_cpu_context *hv_cpu = (void *)data;
 841        void *page_addr = hv_cpu->synic_message_page;
 842        struct hv_message *msg = (struct hv_message *)page_addr +
 843                                  VMBUS_MESSAGE_SINT;
 844        struct vmbus_channel_message_header *hdr;
 845        const struct vmbus_channel_message_table_entry *entry;
 846        struct onmessage_work_context *ctx;
 847        u32 message_type = msg->header.message_type;
 848
 849        if (message_type == HVMSG_NONE)
 850                /* no msg */
 851                return;
 852
 853        hdr = (struct vmbus_channel_message_header *)msg->u.payload;
 854
 855        trace_vmbus_on_msg_dpc(hdr);
 856
 857        if (hdr->msgtype >= CHANNELMSG_COUNT) {
 858                WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
 859                goto msg_handled;
 860        }
 861
 862        entry = &channel_message_table[hdr->msgtype];
 863        if (entry->handler_type == VMHT_BLOCKING) {
 864                ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
 865                if (ctx == NULL)
 866                        return;
 867
 868                INIT_WORK(&ctx->work, vmbus_onmessage_work);
 869                memcpy(&ctx->msg, msg, sizeof(*msg));
 870
 871                /*
 872                 * The host can generate a rescind message while we
 873                 * may still be handling the original offer. We deal with
 874                 * this condition by ensuring the processing is done on the
 875                 * same CPU.
 876                 */
 877                switch (hdr->msgtype) {
 878                case CHANNELMSG_RESCIND_CHANNELOFFER:
 879                        /*
 880                         * If we are handling the rescind message;
 881                         * schedule the work on the global work queue.
 882                         */
 883                        schedule_work_on(vmbus_connection.connect_cpu,
 884                                         &ctx->work);
 885                        break;
 886
 887                case CHANNELMSG_OFFERCHANNEL:
 888                        atomic_inc(&vmbus_connection.offer_in_progress);
 889                        queue_work_on(vmbus_connection.connect_cpu,
 890                                      vmbus_connection.work_queue,
 891                                      &ctx->work);
 892                        break;
 893
 894                default:
 895                        queue_work(vmbus_connection.work_queue, &ctx->work);
 896                }
 897        } else
 898                entry->message_handler(hdr);
 899
 900msg_handled:
 901        vmbus_signal_eom(msg, message_type);
 902}
 903
 904
 905/*
 906 * Direct callback for channels using other deferred processing
 907 */
 908static void vmbus_channel_isr(struct vmbus_channel *channel)
 909{
 910        void (*callback_fn)(void *);
 911
 912        callback_fn = READ_ONCE(channel->onchannel_callback);
 913        if (likely(callback_fn != NULL))
 914                (*callback_fn)(channel->channel_callback_context);
 915}
 916
 917/*
 918 * Schedule all channels with events pending
 919 */
 920static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 921{
 922        unsigned long *recv_int_page;
 923        u32 maxbits, relid;
 924
 925        if (vmbus_proto_version < VERSION_WIN8) {
 926                maxbits = MAX_NUM_CHANNELS_SUPPORTED;
 927                recv_int_page = vmbus_connection.recv_int_page;
 928        } else {
 929                /*
 930                 * When the host is win8 and beyond, the event page
 931                 * can be directly checked to get the id of the channel
 932                 * that has the interrupt pending.
 933                 */
 934                void *page_addr = hv_cpu->synic_event_page;
 935                union hv_synic_event_flags *event
 936                        = (union hv_synic_event_flags *)page_addr +
 937                                                 VMBUS_MESSAGE_SINT;
 938
 939                maxbits = HV_EVENT_FLAGS_COUNT;
 940                recv_int_page = event->flags;
 941        }
 942
 943        if (unlikely(!recv_int_page))
 944                return;
 945
 946        for_each_set_bit(relid, recv_int_page, maxbits) {
 947                struct vmbus_channel *channel;
 948
 949                if (!sync_test_and_clear_bit(relid, recv_int_page))
 950                        continue;
 951
 952                /* Special case - vmbus channel protocol msg */
 953                if (relid == 0)
 954                        continue;
 955
 956                rcu_read_lock();
 957
 958                /* Find channel based on relid */
 959                list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
 960                        if (channel->offermsg.child_relid != relid)
 961                                continue;
 962
 963                        if (channel->rescind)
 964                                continue;
 965
 966                        trace_vmbus_chan_sched(channel);
 967
 968                        ++channel->interrupts;
 969
 970                        switch (channel->callback_mode) {
 971                        case HV_CALL_ISR:
 972                                vmbus_channel_isr(channel);
 973                                break;
 974
 975                        case HV_CALL_BATCHED:
 976                                hv_begin_read(&channel->inbound);
 977                                /* fallthrough */
 978                        case HV_CALL_DIRECT:
 979                                tasklet_schedule(&channel->callback_event);
 980                        }
 981                }
 982
 983                rcu_read_unlock();
 984        }
 985}
 986
 987static void vmbus_isr(void)
 988{
 989        struct hv_per_cpu_context *hv_cpu
 990                = this_cpu_ptr(hv_context.cpu_context);
 991        void *page_addr = hv_cpu->synic_event_page;
 992        struct hv_message *msg;
 993        union hv_synic_event_flags *event;
 994        bool handled = false;
 995
 996        if (unlikely(page_addr == NULL))
 997                return;
 998
 999        event = (union hv_synic_event_flags *)page_addr +
1000                                         VMBUS_MESSAGE_SINT;
1001        /*
1002         * Check for events before checking for messages. This is the order
1003         * in which events and messages are checked in Windows guests on
1004         * Hyper-V, and the Windows team suggested we do the same.
1005         */
1006
1007        if ((vmbus_proto_version == VERSION_WS2008) ||
1008                (vmbus_proto_version == VERSION_WIN7)) {
1009
1010                /* Since we are a child, we only need to check bit 0 */
1011                if (sync_test_and_clear_bit(0, event->flags))
1012                        handled = true;
1013        } else {
1014                /*
1015                 * Our host is win8 or above. The signaling mechanism
1016                 * has changed and we can directly look at the event page.
1017                 * If bit n is set then we have an interrup on the channel
1018                 * whose id is n.
1019                 */
1020                handled = true;
1021        }
1022
1023        if (handled)
1024                vmbus_chan_sched(hv_cpu);
1025
1026        page_addr = hv_cpu->synic_message_page;
1027        msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
1028
1029        /* Check if there are actual msgs to be processed */
1030        if (msg->header.message_type != HVMSG_NONE) {
1031                if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
1032                        hv_process_timer_expiration(msg, hv_cpu);
1033                else
1034                        tasklet_schedule(&hv_cpu->msg_dpc);
1035        }
1036
1037        add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
1038}
1039
1040/*
1041 * Boolean to control whether to report panic messages over Hyper-V.
1042 *
1043 * It can be set via /proc/sys/kernel/hyperv/record_panic_msg
1044 */
1045static int sysctl_record_panic_msg = 1;
1046
1047/*
1048 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
1049 * buffer and call into Hyper-V to transfer the data.
1050 */
1051static void hv_kmsg_dump(struct kmsg_dumper *dumper,
1052                         enum kmsg_dump_reason reason)
1053{
1054        size_t bytes_written;
1055        phys_addr_t panic_pa;
1056
1057        /* We are only interested in panics. */
1058        if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
1059                return;
1060
1061        panic_pa = virt_to_phys(hv_panic_page);
1062
1063        /*
1064         * Write dump contents to the page. No need to synchronize; panic should
1065         * be single-threaded.
1066         */
1067        kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE,
1068                             &bytes_written);
1069        if (bytes_written)
1070                hyperv_report_panic_msg(panic_pa, bytes_written);
1071}
1072
1073static struct kmsg_dumper hv_kmsg_dumper = {
1074        .dump = hv_kmsg_dump,
1075};
1076
1077static struct ctl_table_header *hv_ctl_table_hdr;
1078static int zero;
1079static int one = 1;
1080
1081/*
1082 * sysctl option to allow the user to control whether kmsg data should be
1083 * reported to Hyper-V on panic.
1084 */
1085static struct ctl_table hv_ctl_table[] = {
1086        {
1087                .procname       = "hyperv_record_panic_msg",
1088                .data           = &sysctl_record_panic_msg,
1089                .maxlen         = sizeof(int),
1090                .mode           = 0644,
1091                .proc_handler   = proc_dointvec_minmax,
1092                .extra1         = &zero,
1093                .extra2         = &one
1094        },
1095        {}
1096};
1097
1098static struct ctl_table hv_root_table[] = {
1099        {
1100                .procname       = "kernel",
1101                .mode           = 0555,
1102                .child          = hv_ctl_table
1103        },
1104        {}
1105};
1106
1107/*
1108 * vmbus_bus_init -Main vmbus driver initialization routine.
1109 *
1110 * Here, we
1111 *      - initialize the vmbus driver context
1112 *      - invoke the vmbus hv main init routine
1113 *      - retrieve the channel offers
1114 */
1115static int vmbus_bus_init(void)
1116{
1117        int ret;
1118
1119        /* Hypervisor initialization...setup hypercall page..etc */
1120        ret = hv_init();
1121        if (ret != 0) {
1122                pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
1123                return ret;
1124        }
1125
1126        ret = bus_register(&hv_bus);
1127        if (ret)
1128                return ret;
1129
1130        hv_setup_vmbus_irq(vmbus_isr);
1131
1132        ret = hv_synic_alloc();
1133        if (ret)
1134                goto err_alloc;
1135        /*
1136         * Initialize the per-cpu interrupt state and
1137         * connect to the host.
1138         */
1139        ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
1140                                hv_synic_init, hv_synic_cleanup);
1141        if (ret < 0)
1142                goto err_alloc;
1143        hyperv_cpuhp_online = ret;
1144
1145        ret = vmbus_connect();
1146        if (ret)
1147                goto err_connect;
1148
1149        /*
1150         * Only register if the crash MSRs are available
1151         */
1152        if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
1153                u64 hyperv_crash_ctl;
1154                /*
1155                 * Sysctl registration is not fatal, since by default
1156                 * reporting is enabled.
1157                 */
1158                hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
1159                if (!hv_ctl_table_hdr)
1160                        pr_err("Hyper-V: sysctl table register error");
1161
1162                /*
1163                 * Register for panic kmsg callback only if the right
1164                 * capability is supported by the hypervisor.
1165                 */
1166                hv_get_crash_ctl(hyperv_crash_ctl);
1167                if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) {
1168                        hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL);
1169                        if (hv_panic_page) {
1170                                ret = kmsg_dump_register(&hv_kmsg_dumper);
1171                                if (ret)
1172                                        pr_err("Hyper-V: kmsg dump register "
1173                                                "error 0x%x\n", ret);
1174                        } else
1175                                pr_err("Hyper-V: panic message page memory "
1176                                        "allocation failed");
1177                }
1178
1179                register_die_notifier(&hyperv_die_block);
1180                atomic_notifier_chain_register(&panic_notifier_list,
1181                                               &hyperv_panic_block);
1182        }
1183
1184        vmbus_request_offers();
1185
1186        return 0;
1187
1188err_connect:
1189        cpuhp_remove_state(hyperv_cpuhp_online);
1190err_alloc:
1191        hv_synic_free();
1192        hv_remove_vmbus_irq();
1193
1194        bus_unregister(&hv_bus);
1195        free_page((unsigned long)hv_panic_page);
1196        unregister_sysctl_table(hv_ctl_table_hdr);
1197        hv_ctl_table_hdr = NULL;
1198        return ret;
1199}
1200
1201/**
1202 * __vmbus_child_driver_register() - Register a vmbus's driver
1203 * @hv_driver: Pointer to driver structure you want to register
1204 * @owner: owner module of the drv
1205 * @mod_name: module name string
1206 *
1207 * Registers the given driver with Linux through the 'driver_register()' call
1208 * and sets up the hyper-v vmbus handling for this driver.
1209 * It will return the state of the 'driver_register()' call.
1210 *
1211 */
1212int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
1213{
1214        int ret;
1215
1216        pr_info("registering driver %s\n", hv_driver->name);
1217
1218        ret = vmbus_exists();
1219        if (ret < 0)
1220                return ret;
1221
1222        hv_driver->driver.name = hv_driver->name;
1223        hv_driver->driver.owner = owner;
1224        hv_driver->driver.mod_name = mod_name;
1225        hv_driver->driver.bus = &hv_bus;
1226
1227        spin_lock_init(&hv_driver->dynids.lock);
1228        INIT_LIST_HEAD(&hv_driver->dynids.list);
1229
1230        ret = driver_register(&hv_driver->driver);
1231
1232        return ret;
1233}
1234EXPORT_SYMBOL_GPL(__vmbus_driver_register);
1235
1236/**
1237 * vmbus_driver_unregister() - Unregister a vmbus's driver
1238 * @hv_driver: Pointer to driver structure you want to
1239 *             un-register
1240 *
1241 * Un-register the given driver that was previous registered with a call to
1242 * vmbus_driver_register()
1243 */
1244void vmbus_driver_unregister(struct hv_driver *hv_driver)
1245{
1246        pr_info("unregistering driver %s\n", hv_driver->name);
1247
1248        if (!vmbus_exists()) {
1249                driver_unregister(&hv_driver->driver);
1250                vmbus_free_dynids(hv_driver);
1251        }
1252}
1253EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
1254
1255
1256/*
1257 * Called when last reference to channel is gone.
1258 */
1259static void vmbus_chan_release(struct kobject *kobj)
1260{
1261        struct vmbus_channel *channel
1262                = container_of(kobj, struct vmbus_channel, kobj);
1263
1264        kfree_rcu(channel, rcu);
1265}
1266
1267struct vmbus_chan_attribute {
1268        struct attribute attr;
1269        ssize_t (*show)(const struct vmbus_channel *chan, char *buf);
1270        ssize_t (*store)(struct vmbus_channel *chan,
1271                         const char *buf, size_t count);
1272};
1273#define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \
1274        struct vmbus_chan_attribute chan_attr_##_name \
1275                = __ATTR(_name, _mode, _show, _store)
1276#define VMBUS_CHAN_ATTR_RW(_name) \
1277        struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name)
1278#define VMBUS_CHAN_ATTR_RO(_name) \
1279        struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name)
1280#define VMBUS_CHAN_ATTR_WO(_name) \
1281        struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name)
1282
1283static ssize_t vmbus_chan_attr_show(struct kobject *kobj,
1284                                    struct attribute *attr, char *buf)
1285{
1286        const struct vmbus_chan_attribute *attribute
1287                = container_of(attr, struct vmbus_chan_attribute, attr);
1288        const struct vmbus_channel *chan
1289                = container_of(kobj, struct vmbus_channel, kobj);
1290
1291        if (!attribute->show)
1292                return -EIO;
1293
1294        if (chan->state != CHANNEL_OPENED_STATE)
1295                return -EINVAL;
1296
1297        return attribute->show(chan, buf);
1298}
1299
1300static const struct sysfs_ops vmbus_chan_sysfs_ops = {
1301        .show = vmbus_chan_attr_show,
1302};
1303
1304static ssize_t out_mask_show(const struct vmbus_channel *channel, char *buf)
1305{
1306        const struct hv_ring_buffer_info *rbi = &channel->outbound;
1307
1308        return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1309}
1310static VMBUS_CHAN_ATTR_RO(out_mask);
1311
1312static ssize_t in_mask_show(const struct vmbus_channel *channel, char *buf)
1313{
1314        const struct hv_ring_buffer_info *rbi = &channel->inbound;
1315
1316        return sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
1317}
1318static VMBUS_CHAN_ATTR_RO(in_mask);
1319
1320static ssize_t read_avail_show(const struct vmbus_channel *channel, char *buf)
1321{
1322        const struct hv_ring_buffer_info *rbi = &channel->inbound;
1323
1324        return sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
1325}
1326static VMBUS_CHAN_ATTR_RO(read_avail);
1327
1328static ssize_t write_avail_show(const struct vmbus_channel *channel, char *buf)
1329{
1330        const struct hv_ring_buffer_info *rbi = &channel->outbound;
1331
1332        return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
1333}
1334static VMBUS_CHAN_ATTR_RO(write_avail);
1335
1336static ssize_t show_target_cpu(const struct vmbus_channel *channel, char *buf)
1337{
1338        return sprintf(buf, "%u\n", channel->target_cpu);
1339}
1340static VMBUS_CHAN_ATTR(cpu, S_IRUGO, show_target_cpu, NULL);
1341
1342static ssize_t channel_pending_show(const struct vmbus_channel *channel,
1343                                    char *buf)
1344{
1345        return sprintf(buf, "%d\n",
1346                       channel_pending(channel,
1347                                       vmbus_connection.monitor_pages[1]));
1348}
1349static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL);
1350
1351static ssize_t channel_latency_show(const struct vmbus_channel *channel,
1352                                    char *buf)
1353{
1354        return sprintf(buf, "%d\n",
1355                       channel_latency(channel,
1356                                       vmbus_connection.monitor_pages[1]));
1357}
1358static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL);
1359
1360static ssize_t channel_interrupts_show(const struct vmbus_channel *channel, char *buf)
1361{
1362        return sprintf(buf, "%llu\n", channel->interrupts);
1363}
1364static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL);
1365
1366static ssize_t channel_events_show(const struct vmbus_channel *channel, char *buf)
1367{
1368        return sprintf(buf, "%llu\n", channel->sig_events);
1369}
1370static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL);
1371
1372static ssize_t subchannel_monitor_id_show(const struct vmbus_channel *channel,
1373                                          char *buf)
1374{
1375        return sprintf(buf, "%u\n", channel->offermsg.monitorid);
1376}
1377static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL);
1378
1379static ssize_t subchannel_id_show(const struct vmbus_channel *channel,
1380                                  char *buf)
1381{
1382        return sprintf(buf, "%u\n",
1383                       channel->offermsg.offer.sub_channel_index);
1384}
1385static VMBUS_CHAN_ATTR_RO(subchannel_id);
1386
1387static struct attribute *vmbus_chan_attrs[] = {
1388        &chan_attr_out_mask.attr,
1389        &chan_attr_in_mask.attr,
1390        &chan_attr_read_avail.attr,
1391        &chan_attr_write_avail.attr,
1392        &chan_attr_cpu.attr,
1393        &chan_attr_pending.attr,
1394        &chan_attr_latency.attr,
1395        &chan_attr_interrupts.attr,
1396        &chan_attr_events.attr,
1397        &chan_attr_monitor_id.attr,
1398        &chan_attr_subchannel_id.attr,
1399        NULL
1400};
1401
1402static struct kobj_type vmbus_chan_ktype = {
1403        .sysfs_ops = &vmbus_chan_sysfs_ops,
1404        .release = vmbus_chan_release,
1405        .default_attrs = vmbus_chan_attrs,
1406};
1407
1408/*
1409 * vmbus_add_channel_kobj - setup a sub-directory under device/channels
1410 */
1411int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
1412{
1413        struct kobject *kobj = &channel->kobj;
1414        u32 relid = channel->offermsg.child_relid;
1415        int ret;
1416
1417        kobj->kset = dev->channels_kset;
1418        ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL,
1419                                   "%u", relid);
1420        if (ret)
1421                return ret;
1422
1423        kobject_uevent(kobj, KOBJ_ADD);
1424
1425        return 0;
1426}
1427
1428/*
1429 * vmbus_device_create - Creates and registers a new child device
1430 * on the vmbus.
1431 */
1432struct hv_device *vmbus_device_create(const uuid_le *type,
1433                                      const uuid_le *instance,
1434                                      struct vmbus_channel *channel)
1435{
1436        struct hv_device *child_device_obj;
1437
1438        child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
1439        if (!child_device_obj) {
1440                pr_err("Unable to allocate device object for child device\n");
1441                return NULL;
1442        }
1443
1444        child_device_obj->channel = channel;
1445        memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
1446        memcpy(&child_device_obj->dev_instance, instance,
1447               sizeof(uuid_le));
1448        child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
1449
1450
1451        return child_device_obj;
1452}
1453
1454/*
1455 * vmbus_device_register - Register the child device
1456 */
1457int vmbus_device_register(struct hv_device *child_device_obj)
1458{
1459        struct kobject *kobj = &child_device_obj->device.kobj;
1460        int ret;
1461
1462        dev_set_name(&child_device_obj->device, "%pUl",
1463                     child_device_obj->channel->offermsg.offer.if_instance.b);
1464
1465        child_device_obj->device.bus = &hv_bus;
1466        child_device_obj->device.parent = &hv_acpi_dev->dev;
1467        child_device_obj->device.release = vmbus_device_release;
1468
1469        /*
1470         * Register with the LDM. This will kick off the driver/device
1471         * binding...which will eventually call vmbus_match() and vmbus_probe()
1472         */
1473        ret = device_register(&child_device_obj->device);
1474        if (ret) {
1475                pr_err("Unable to register child device\n");
1476                return ret;
1477        }
1478
1479        child_device_obj->channels_kset = kset_create_and_add("channels",
1480                                                              NULL, kobj);
1481        if (!child_device_obj->channels_kset) {
1482                ret = -ENOMEM;
1483                goto err_dev_unregister;
1484        }
1485
1486        ret = vmbus_add_channel_kobj(child_device_obj,
1487                                     child_device_obj->channel);
1488        if (ret) {
1489                pr_err("Unable to register primary channeln");
1490                goto err_kset_unregister;
1491        }
1492
1493        return 0;
1494
1495err_kset_unregister:
1496        kset_unregister(child_device_obj->channels_kset);
1497
1498err_dev_unregister:
1499        device_unregister(&child_device_obj->device);
1500        return ret;
1501}
1502
1503/*
1504 * vmbus_device_unregister - Remove the specified child device
1505 * from the vmbus.
1506 */
1507void vmbus_device_unregister(struct hv_device *device_obj)
1508{
1509        pr_debug("child device %s unregistered\n",
1510                dev_name(&device_obj->device));
1511
1512        kset_unregister(device_obj->channels_kset);
1513
1514        /*
1515         * Kick off the process of unregistering the device.
1516         * This will call vmbus_remove() and eventually vmbus_device_release()
1517         */
1518        device_unregister(&device_obj->device);
1519}
1520
1521
1522/*
1523 * VMBUS is an acpi enumerated device. Get the information we
1524 * need from DSDT.
1525 */
1526#define VTPM_BASE_ADDRESS 0xfed40000
1527static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
1528{
1529        resource_size_t start = 0;
1530        resource_size_t end = 0;
1531        struct resource *new_res;
1532        struct resource **old_res = &hyperv_mmio;
1533        struct resource **prev_res = NULL;
1534
1535        switch (res->type) {
1536
1537        /*
1538         * "Address" descriptors are for bus windows. Ignore
1539         * "memory" descriptors, which are for registers on
1540         * devices.
1541         */
1542        case ACPI_RESOURCE_TYPE_ADDRESS32:
1543                start = res->data.address32.address.minimum;
1544                end = res->data.address32.address.maximum;
1545                break;
1546
1547        case ACPI_RESOURCE_TYPE_ADDRESS64:
1548                start = res->data.address64.address.minimum;
1549                end = res->data.address64.address.maximum;
1550                break;
1551
1552        default:
1553                /* Unused resource type */
1554                return AE_OK;
1555
1556        }
1557        /*
1558         * Ignore ranges that are below 1MB, as they're not
1559         * necessary or useful here.
1560         */
1561        if (end < 0x100000)
1562                return AE_OK;
1563
1564        new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC);
1565        if (!new_res)
1566                return AE_NO_MEMORY;
1567
1568        /* If this range overlaps the virtual TPM, truncate it. */
1569        if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
1570                end = VTPM_BASE_ADDRESS;
1571
1572        new_res->name = "hyperv mmio";
1573        new_res->flags = IORESOURCE_MEM;
1574        new_res->start = start;
1575        new_res->end = end;
1576
1577        /*
1578         * If two ranges are adjacent, merge them.
1579         */
1580        do {
1581                if (!*old_res) {
1582                        *old_res = new_res;
1583                        break;
1584                }
1585
1586                if (((*old_res)->end + 1) == new_res->start) {
1587                        (*old_res)->end = new_res->end;
1588                        kfree(new_res);
1589                        break;
1590                }
1591
1592                if ((*old_res)->start == new_res->end + 1) {
1593                        (*old_res)->start = new_res->start;
1594                        kfree(new_res);
1595                        break;
1596                }
1597
1598                if ((*old_res)->start > new_res->end) {
1599                        new_res->sibling = *old_res;
1600                        if (prev_res)
1601                                (*prev_res)->sibling = new_res;
1602                        *old_res = new_res;
1603                        break;
1604                }
1605
1606                prev_res = old_res;
1607                old_res = &(*old_res)->sibling;
1608
1609        } while (1);
1610
1611        return AE_OK;
1612}
1613
1614static int vmbus_acpi_remove(struct acpi_device *device)
1615{
1616        struct resource *cur_res;
1617        struct resource *next_res;
1618
1619        if (hyperv_mmio) {
1620                if (fb_mmio) {
1621                        __release_region(hyperv_mmio, fb_mmio->start,
1622                                         resource_size(fb_mmio));
1623                        fb_mmio = NULL;
1624                }
1625
1626                for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) {
1627                        next_res = cur_res->sibling;
1628                        kfree(cur_res);
1629                }
1630        }
1631
1632        return 0;
1633}
1634
1635static void vmbus_reserve_fb(void)
1636{
1637        int size;
1638        /*
1639         * Make a claim for the frame buffer in the resource tree under the
1640         * first node, which will be the one below 4GB.  The length seems to
1641         * be underreported, particularly in a Generation 1 VM.  So start out
1642         * reserving a larger area and make it smaller until it succeeds.
1643         */
1644
1645        if (screen_info.lfb_base) {
1646                if (efi_enabled(EFI_BOOT))
1647                        size = max_t(__u32, screen_info.lfb_size, 0x800000);
1648                else
1649                        size = max_t(__u32, screen_info.lfb_size, 0x4000000);
1650
1651                for (; !fb_mmio && (size >= 0x100000); size >>= 1) {
1652                        fb_mmio = __request_region(hyperv_mmio,
1653                                                   screen_info.lfb_base, size,
1654                                                   fb_mmio_name, 0);
1655                }
1656        }
1657}
1658
1659/**
1660 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range.
1661 * @new:                If successful, supplied a pointer to the
1662 *                      allocated MMIO space.
1663 * @device_obj:         Identifies the caller
1664 * @min:                Minimum guest physical address of the
1665 *                      allocation
1666 * @max:                Maximum guest physical address
1667 * @size:               Size of the range to be allocated
1668 * @align:              Alignment of the range to be allocated
1669 * @fb_overlap_ok:      Whether this allocation can be allowed
1670 *                      to overlap the video frame buffer.
1671 *
1672 * This function walks the resources granted to VMBus by the
1673 * _CRS object in the ACPI namespace underneath the parent
1674 * "bridge" whether that's a root PCI bus in the Generation 1
1675 * case or a Module Device in the Generation 2 case.  It then
1676 * attempts to allocate from the global MMIO pool in a way that
1677 * matches the constraints supplied in these parameters and by
1678 * that _CRS.
1679 *
1680 * Return: 0 on success, -errno on failure
1681 */
1682int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
1683                        resource_size_t min, resource_size_t max,
1684                        resource_size_t size, resource_size_t align,
1685                        bool fb_overlap_ok)
1686{
1687        struct resource *iter, *shadow;
1688        resource_size_t range_min, range_max, start;
1689        const char *dev_n = dev_name(&device_obj->device);
1690        int retval;
1691
1692        retval = -ENXIO;
1693        down(&hyperv_mmio_lock);
1694
1695        /*
1696         * If overlaps with frame buffers are allowed, then first attempt to
1697         * make the allocation from within the reserved region.  Because it
1698         * is already reserved, no shadow allocation is necessary.
1699         */
1700        if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) &&
1701            !(max < fb_mmio->start)) {
1702
1703                range_min = fb_mmio->start;
1704                range_max = fb_mmio->end;
1705                start = (range_min + align - 1) & ~(align - 1);
1706                for (; start + size - 1 <= range_max; start += align) {
1707                        *new = request_mem_region_exclusive(start, size, dev_n);
1708                        if (*new) {
1709                                retval = 0;
1710                                goto exit;
1711                        }
1712                }
1713        }
1714
1715        for (iter = hyperv_mmio; iter; iter = iter->sibling) {
1716                if ((iter->start >= max) || (iter->end <= min))
1717                        continue;
1718
1719                range_min = iter->start;
1720                range_max = iter->end;
1721                start = (range_min + align - 1) & ~(align - 1);
1722                for (; start + size - 1 <= range_max; start += align) {
1723                        shadow = __request_region(iter, start, size, NULL,
1724                                                  IORESOURCE_BUSY);
1725                        if (!shadow)
1726                                continue;
1727
1728                        *new = request_mem_region_exclusive(start, size, dev_n);
1729                        if (*new) {
1730                                shadow->name = (char *)*new;
1731                                retval = 0;
1732                                goto exit;
1733                        }
1734
1735                        __release_region(iter, start, size);
1736                }
1737        }
1738
1739exit:
1740        up(&hyperv_mmio_lock);
1741        return retval;
1742}
1743EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
1744
1745/**
1746 * vmbus_free_mmio() - Free a memory-mapped I/O range.
1747 * @start:              Base address of region to release.
1748 * @size:               Size of the range to be allocated
1749 *
1750 * This function releases anything requested by
1751 * vmbus_mmio_allocate().
1752 */
1753void vmbus_free_mmio(resource_size_t start, resource_size_t size)
1754{
1755        struct resource *iter;
1756
1757        down(&hyperv_mmio_lock);
1758        for (iter = hyperv_mmio; iter; iter = iter->sibling) {
1759                if ((iter->start >= start + size) || (iter->end <= start))
1760                        continue;
1761
1762                __release_region(iter, start, size);
1763        }
1764        release_mem_region(start, size);
1765        up(&hyperv_mmio_lock);
1766
1767}
1768EXPORT_SYMBOL_GPL(vmbus_free_mmio);
1769
1770static int vmbus_acpi_add(struct acpi_device *device)
1771{
1772        acpi_status result;
1773        int ret_val = -ENODEV;
1774        struct acpi_device *ancestor;
1775
1776        hv_acpi_dev = device;
1777
1778        result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
1779                                        vmbus_walk_resources, NULL);
1780
1781        if (ACPI_FAILURE(result))
1782                goto acpi_walk_err;
1783        /*
1784         * Some ancestor of the vmbus acpi device (Gen1 or Gen2
1785         * firmware) is the VMOD that has the mmio ranges. Get that.
1786         */
1787        for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) {
1788                result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
1789                                             vmbus_walk_resources, NULL);
1790
1791                if (ACPI_FAILURE(result))
1792                        continue;
1793                if (hyperv_mmio) {
1794                        vmbus_reserve_fb();
1795                        break;
1796                }
1797        }
1798        ret_val = 0;
1799
1800acpi_walk_err:
1801        complete(&probe_event);
1802        if (ret_val)
1803                vmbus_acpi_remove(device);
1804        return ret_val;
1805}
1806
1807static const struct acpi_device_id vmbus_acpi_device_ids[] = {
1808        {"VMBUS", 0},
1809        {"VMBus", 0},
1810        {"", 0},
1811};
1812MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
1813
1814static struct acpi_driver vmbus_acpi_driver = {
1815        .name = "vmbus",
1816        .ids = vmbus_acpi_device_ids,
1817        .ops = {
1818                .add = vmbus_acpi_add,
1819                .remove = vmbus_acpi_remove,
1820        },
1821};
1822
1823static void hv_kexec_handler(void)
1824{
1825        hv_synic_clockevents_cleanup();
1826        vmbus_initiate_unload(false);
1827        vmbus_connection.conn_state = DISCONNECTED;
1828        /* Make sure conn_state is set as hv_synic_cleanup checks for it */
1829        mb();
1830        cpuhp_remove_state(hyperv_cpuhp_online);
1831        hyperv_cleanup();
1832};
1833
1834static void hv_crash_handler(struct pt_regs *regs)
1835{
1836        vmbus_initiate_unload(true);
1837        /*
1838         * In crash handler we can't schedule synic cleanup for all CPUs,
1839         * doing the cleanup for current CPU only. This should be sufficient
1840         * for kdump.
1841         */
1842        vmbus_connection.conn_state = DISCONNECTED;
1843        hv_synic_cleanup(smp_processor_id());
1844        hyperv_cleanup();
1845};
1846
1847static int __init hv_acpi_init(void)
1848{
1849        int ret, t;
1850
1851        if (!hv_is_hyperv_initialized())
1852                return -ENODEV;
1853
1854        init_completion(&probe_event);
1855
1856        /*
1857         * Get ACPI resources first.
1858         */
1859        ret = acpi_bus_register_driver(&vmbus_acpi_driver);
1860
1861        if (ret)
1862                return ret;
1863
1864        t = wait_for_completion_timeout(&probe_event, 5*HZ);
1865        if (t == 0) {
1866                ret = -ETIMEDOUT;
1867                goto cleanup;
1868        }
1869
1870        ret = vmbus_bus_init();
1871        if (ret)
1872                goto cleanup;
1873
1874        hv_setup_kexec_handler(hv_kexec_handler);
1875        hv_setup_crash_handler(hv_crash_handler);
1876
1877        return 0;
1878
1879cleanup:
1880        acpi_bus_unregister_driver(&vmbus_acpi_driver);
1881        hv_acpi_dev = NULL;
1882        return ret;
1883}
1884
1885static void __exit vmbus_exit(void)
1886{
1887        int cpu;
1888
1889        hv_remove_kexec_handler();
1890        hv_remove_crash_handler();
1891        vmbus_connection.conn_state = DISCONNECTED;
1892        hv_synic_clockevents_cleanup();
1893        vmbus_disconnect();
1894        hv_remove_vmbus_irq();
1895        for_each_online_cpu(cpu) {
1896                struct hv_per_cpu_context *hv_cpu
1897                        = per_cpu_ptr(hv_context.cpu_context, cpu);
1898
1899                tasklet_kill(&hv_cpu->msg_dpc);
1900        }
1901        vmbus_free_channels();
1902
1903        if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
1904                kmsg_dump_unregister(&hv_kmsg_dumper);
1905                unregister_die_notifier(&hyperv_die_block);
1906                atomic_notifier_chain_unregister(&panic_notifier_list,
1907                                                 &hyperv_panic_block);
1908        }
1909
1910        free_page((unsigned long)hv_panic_page);
1911        unregister_sysctl_table(hv_ctl_table_hdr);
1912        hv_ctl_table_hdr = NULL;
1913        bus_unregister(&hv_bus);
1914
1915        cpuhp_remove_state(hyperv_cpuhp_online);
1916        hv_synic_free();
1917        acpi_bus_unregister_driver(&vmbus_acpi_driver);
1918}
1919
1920
1921MODULE_LICENSE("GPL");
1922
1923subsys_initcall(hv_acpi_init);
1924module_exit(vmbus_exit);
1925