linux/arch/powerpc/kernel/vio.c
<<
>>
Prefs
   1/*
   2 * IBM PowerPC Virtual I/O Infrastructure Support.
   3 *
   4 *    Copyright (c) 2003,2008 IBM Corp.
   5 *     Dave Engebretsen engebret@us.ibm.com
   6 *     Santiago Leon santil@us.ibm.com
   7 *     Hollis Blanchard <hollisb@us.ibm.com>
   8 *     Stephen Rothwell
   9 *     Robert Jennings <rcjenn@us.ibm.com>
  10 *
  11 *      This program is free software; you can redistribute it and/or
  12 *      modify it under the terms of the GNU General Public License
  13 *      as published by the Free Software Foundation; either version
  14 *      2 of the License, or (at your option) any later version.
  15 */
  16
  17#include <linux/types.h>
  18#include <linux/stat.h>
  19#include <linux/device.h>
  20#include <linux/init.h>
  21#include <linux/slab.h>
  22#include <linux/console.h>
  23#include <linux/export.h>
  24#include <linux/mm.h>
  25#include <linux/dma-mapping.h>
  26#include <linux/kobject.h>
  27
  28#include <asm/iommu.h>
  29#include <asm/dma.h>
  30#include <asm/vio.h>
  31#include <asm/prom.h>
  32#include <asm/firmware.h>
  33#include <asm/tce.h>
  34#include <asm/abs_addr.h>
  35#include <asm/page.h>
  36#include <asm/hvcall.h>
  37
  38static struct bus_type vio_bus_type;
  39
  40static struct vio_dev vio_bus_device  = { /* fake "parent" device */
  41        .name = "vio",
  42        .type = "",
  43        .dev.init_name = "vio",
  44        .dev.bus = &vio_bus_type,
  45};
  46
  47#ifdef CONFIG_PPC_SMLPAR
  48/**
  49 * vio_cmo_pool - A pool of IO memory for CMO use
  50 *
  51 * @size: The size of the pool in bytes
  52 * @free: The amount of free memory in the pool
  53 */
  54struct vio_cmo_pool {
  55        size_t size;
  56        size_t free;
  57};
  58
  59/* How many ms to delay queued balance work */
  60#define VIO_CMO_BALANCE_DELAY 100
  61
  62/* Portion out IO memory to CMO devices by this chunk size */
  63#define VIO_CMO_BALANCE_CHUNK 131072
  64
  65/**
  66 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
  67 *
  68 * @vio_dev: struct vio_dev pointer
  69 * @list: pointer to other devices on bus that are being tracked
  70 */
  71struct vio_cmo_dev_entry {
  72        struct vio_dev *viodev;
  73        struct list_head list;
  74};
  75
  76/**
  77 * vio_cmo - VIO bus accounting structure for CMO entitlement
  78 *
  79 * @lock: spinlock for entire structure
  80 * @balance_q: work queue for balancing system entitlement
  81 * @device_list: list of CMO-enabled devices requiring entitlement
  82 * @entitled: total system entitlement in bytes
  83 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
  84 * @excess: pool of excess entitlement not needed for device reserves or spare
  85 * @spare: IO memory for device hotplug functionality
  86 * @min: minimum necessary for system operation
  87 * @desired: desired memory for system operation
  88 * @curr: bytes currently allocated
  89 * @high: high water mark for IO data usage
  90 */
  91struct vio_cmo {
  92        spinlock_t lock;
  93        struct delayed_work balance_q;
  94        struct list_head device_list;
  95        size_t entitled;
  96        struct vio_cmo_pool reserve;
  97        struct vio_cmo_pool excess;
  98        size_t spare;
  99        size_t min;
 100        size_t desired;
 101        size_t curr;
 102        size_t high;
 103} vio_cmo;
 104
 105/**
 106 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
 107 */
 108static int vio_cmo_num_OF_devs(void)
 109{
 110        struct device_node *node_vroot;
 111        int count = 0;
 112
 113        /*
 114         * Count the number of vdevice entries with an
 115         * ibm,my-dma-window OF property
 116         */
 117        node_vroot = of_find_node_by_name(NULL, "vdevice");
 118        if (node_vroot) {
 119                struct device_node *of_node;
 120                struct property *prop;
 121
 122                for_each_child_of_node(node_vroot, of_node) {
 123                        prop = of_find_property(of_node, "ibm,my-dma-window",
 124                                               NULL);
 125                        if (prop)
 126                                count++;
 127                }
 128        }
 129        of_node_put(node_vroot);
 130        return count;
 131}
 132
 133/**
 134 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
 135 *
 136 * @viodev: VIO device requesting IO memory
 137 * @size: size of allocation requested
 138 *
 139 * Allocations come from memory reserved for the devices and any excess
 140 * IO memory available to all devices.  The spare pool used to service
 141 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
 142 * made available.
 143 *
 144 * Return codes:
 145 *  0 for successful allocation and -ENOMEM for a failure
 146 */
 147static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
 148{
 149        unsigned long flags;
 150        size_t reserve_free = 0;
 151        size_t excess_free = 0;
 152        int ret = -ENOMEM;
 153
 154        spin_lock_irqsave(&vio_cmo.lock, flags);
 155
 156        /* Determine the amount of free entitlement available in reserve */
 157        if (viodev->cmo.entitled > viodev->cmo.allocated)
 158                reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
 159
 160        /* If spare is not fulfilled, the excess pool can not be used. */
 161        if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
 162                excess_free = vio_cmo.excess.free;
 163
 164        /* The request can be satisfied */
 165        if ((reserve_free + excess_free) >= size) {
 166                vio_cmo.curr += size;
 167                if (vio_cmo.curr > vio_cmo.high)
 168                        vio_cmo.high = vio_cmo.curr;
 169                viodev->cmo.allocated += size;
 170                size -= min(reserve_free, size);
 171                vio_cmo.excess.free -= size;
 172                ret = 0;
 173        }
 174
 175        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 176        return ret;
 177}
 178
 179/**
 180 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
 181 * @viodev: VIO device freeing IO memory
 182 * @size: size of deallocation
 183 *
 184 * IO memory is freed by the device back to the correct memory pools.
 185 * The spare pool is replenished first from either memory pool, then
 186 * the reserve pool is used to reduce device entitlement, the excess
 187 * pool is used to increase the reserve pool toward the desired entitlement
 188 * target, and then the remaining memory is returned to the pools.
 189 *
 190 */
 191static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
 192{
 193        unsigned long flags;
 194        size_t spare_needed = 0;
 195        size_t excess_freed = 0;
 196        size_t reserve_freed = size;
 197        size_t tmp;
 198        int balance = 0;
 199
 200        spin_lock_irqsave(&vio_cmo.lock, flags);
 201        vio_cmo.curr -= size;
 202
 203        /* Amount of memory freed from the excess pool */
 204        if (viodev->cmo.allocated > viodev->cmo.entitled) {
 205                excess_freed = min(reserve_freed, (viodev->cmo.allocated -
 206                                                   viodev->cmo.entitled));
 207                reserve_freed -= excess_freed;
 208        }
 209
 210        /* Remove allocation from device */
 211        viodev->cmo.allocated -= (reserve_freed + excess_freed);
 212
 213        /* Spare is a subset of the reserve pool, replenish it first. */
 214        spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
 215
 216        /*
 217         * Replenish the spare in the reserve pool from the excess pool.
 218         * This moves entitlement into the reserve pool.
 219         */
 220        if (spare_needed && excess_freed) {
 221                tmp = min(excess_freed, spare_needed);
 222                vio_cmo.excess.size -= tmp;
 223                vio_cmo.reserve.size += tmp;
 224                vio_cmo.spare += tmp;
 225                excess_freed -= tmp;
 226                spare_needed -= tmp;
 227                balance = 1;
 228        }
 229
 230        /*
 231         * Replenish the spare in the reserve pool from the reserve pool.
 232         * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
 233         * if needed, and gives it to the spare pool. The amount of used
 234         * memory in this pool does not change.
 235         */
 236        if (spare_needed && reserve_freed) {
 237                tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
 238
 239                vio_cmo.spare += tmp;
 240                viodev->cmo.entitled -= tmp;
 241                reserve_freed -= tmp;
 242                spare_needed -= tmp;
 243                balance = 1;
 244        }
 245
 246        /*
 247         * Increase the reserve pool until the desired allocation is met.
 248         * Move an allocation freed from the excess pool into the reserve
 249         * pool and schedule a balance operation.
 250         */
 251        if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
 252                tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
 253
 254                vio_cmo.excess.size -= tmp;
 255                vio_cmo.reserve.size += tmp;
 256                excess_freed -= tmp;
 257                balance = 1;
 258        }
 259
 260        /* Return memory from the excess pool to that pool */
 261        if (excess_freed)
 262                vio_cmo.excess.free += excess_freed;
 263
 264        if (balance)
 265                schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
 266        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 267}
 268
 269/**
 270 * vio_cmo_entitlement_update - Manage system entitlement changes
 271 *
 272 * @new_entitlement: new system entitlement to attempt to accommodate
 273 *
 274 * Increases in entitlement will be used to fulfill the spare entitlement
 275 * and the rest is given to the excess pool.  Decreases, if they are
 276 * possible, come from the excess pool and from unused device entitlement
 277 *
 278 * Returns: 0 on success, -ENOMEM when change can not be made
 279 */
 280int vio_cmo_entitlement_update(size_t new_entitlement)
 281{
 282        struct vio_dev *viodev;
 283        struct vio_cmo_dev_entry *dev_ent;
 284        unsigned long flags;
 285        size_t avail, delta, tmp;
 286
 287        spin_lock_irqsave(&vio_cmo.lock, flags);
 288
 289        /* Entitlement increases */
 290        if (new_entitlement > vio_cmo.entitled) {
 291                delta = new_entitlement - vio_cmo.entitled;
 292
 293                /* Fulfill spare allocation */
 294                if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
 295                        tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
 296                        vio_cmo.spare += tmp;
 297                        vio_cmo.reserve.size += tmp;
 298                        delta -= tmp;
 299                }
 300
 301                /* Remaining new allocation goes to the excess pool */
 302                vio_cmo.entitled += delta;
 303                vio_cmo.excess.size += delta;
 304                vio_cmo.excess.free += delta;
 305
 306                goto out;
 307        }
 308
 309        /* Entitlement decreases */
 310        delta = vio_cmo.entitled - new_entitlement;
 311        avail = vio_cmo.excess.free;
 312
 313        /*
 314         * Need to check how much unused entitlement each device can
 315         * sacrifice to fulfill entitlement change.
 316         */
 317        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 318                if (avail >= delta)
 319                        break;
 320
 321                viodev = dev_ent->viodev;
 322                if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
 323                    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
 324                                avail += viodev->cmo.entitled -
 325                                         max_t(size_t, viodev->cmo.allocated,
 326                                               VIO_CMO_MIN_ENT);
 327        }
 328
 329        if (delta <= avail) {
 330                vio_cmo.entitled -= delta;
 331
 332                /* Take entitlement from the excess pool first */
 333                tmp = min(vio_cmo.excess.free, delta);
 334                vio_cmo.excess.size -= tmp;
 335                vio_cmo.excess.free -= tmp;
 336                delta -= tmp;
 337
 338                /*
 339                 * Remove all but VIO_CMO_MIN_ENT bytes from devices
 340                 * until entitlement change is served
 341                 */
 342                list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 343                        if (!delta)
 344                                break;
 345
 346                        viodev = dev_ent->viodev;
 347                        tmp = 0;
 348                        if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
 349                            (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
 350                                tmp = viodev->cmo.entitled -
 351                                      max_t(size_t, viodev->cmo.allocated,
 352                                            VIO_CMO_MIN_ENT);
 353                        viodev->cmo.entitled -= min(tmp, delta);
 354                        delta -= min(tmp, delta);
 355                }
 356        } else {
 357                spin_unlock_irqrestore(&vio_cmo.lock, flags);
 358                return -ENOMEM;
 359        }
 360
 361out:
 362        schedule_delayed_work(&vio_cmo.balance_q, 0);
 363        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 364        return 0;
 365}
 366
 367/**
 368 * vio_cmo_balance - Balance entitlement among devices
 369 *
 370 * @work: work queue structure for this operation
 371 *
 372 * Any system entitlement above the minimum needed for devices, or
 373 * already allocated to devices, can be distributed to the devices.
 374 * The list of devices is iterated through to recalculate the desired
 375 * entitlement level and to determine how much entitlement above the
 376 * minimum entitlement is allocated to devices.
 377 *
 378 * Small chunks of the available entitlement are given to devices until
 379 * their requirements are fulfilled or there is no entitlement left to give.
 380 * Upon completion sizes of the reserve and excess pools are calculated.
 381 *
 382 * The system minimum entitlement level is also recalculated here.
 383 * Entitlement will be reserved for devices even after vio_bus_remove to
 384 * accommodate reloading the driver.  The OF tree is walked to count the
 385 * number of devices present and this will remove entitlement for devices
 386 * that have actually left the system after having vio_bus_remove called.
 387 */
 388static void vio_cmo_balance(struct work_struct *work)
 389{
 390        struct vio_cmo *cmo;
 391        struct vio_dev *viodev;
 392        struct vio_cmo_dev_entry *dev_ent;
 393        unsigned long flags;
 394        size_t avail = 0, level, chunk, need;
 395        int devcount = 0, fulfilled;
 396
 397        cmo = container_of(work, struct vio_cmo, balance_q.work);
 398
 399        spin_lock_irqsave(&vio_cmo.lock, flags);
 400
 401        /* Calculate minimum entitlement and fulfill spare */
 402        cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
 403        BUG_ON(cmo->min > cmo->entitled);
 404        cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
 405        cmo->min += cmo->spare;
 406        cmo->desired = cmo->min;
 407
 408        /*
 409         * Determine how much entitlement is available and reset device
 410         * entitlements
 411         */
 412        avail = cmo->entitled - cmo->spare;
 413        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 414                viodev = dev_ent->viodev;
 415                devcount++;
 416                viodev->cmo.entitled = VIO_CMO_MIN_ENT;
 417                cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
 418                avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
 419        }
 420
 421        /*
 422         * Having provided each device with the minimum entitlement, loop
 423         * over the devices portioning out the remaining entitlement
 424         * until there is nothing left.
 425         */
 426        level = VIO_CMO_MIN_ENT;
 427        while (avail) {
 428                fulfilled = 0;
 429                list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 430                        viodev = dev_ent->viodev;
 431
 432                        if (viodev->cmo.desired <= level) {
 433                                fulfilled++;
 434                                continue;
 435                        }
 436
 437                        /*
 438                         * Give the device up to VIO_CMO_BALANCE_CHUNK
 439                         * bytes of entitlement, but do not exceed the
 440                         * desired level of entitlement for the device.
 441                         */
 442                        chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
 443                        chunk = min(chunk, (viodev->cmo.desired -
 444                                            viodev->cmo.entitled));
 445                        viodev->cmo.entitled += chunk;
 446
 447                        /*
 448                         * If the memory for this entitlement increase was
 449                         * already allocated to the device it does not come
 450                         * from the available pool being portioned out.
 451                         */
 452                        need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
 453                               max(viodev->cmo.allocated, level);
 454                        avail -= need;
 455
 456                }
 457                if (fulfilled == devcount)
 458                        break;
 459                level += VIO_CMO_BALANCE_CHUNK;
 460        }
 461
 462        /* Calculate new reserve and excess pool sizes */
 463        cmo->reserve.size = cmo->min;
 464        cmo->excess.free = 0;
 465        cmo->excess.size = 0;
 466        need = 0;
 467        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 468                viodev = dev_ent->viodev;
 469                /* Calculated reserve size above the minimum entitlement */
 470                if (viodev->cmo.entitled)
 471                        cmo->reserve.size += (viodev->cmo.entitled -
 472                                              VIO_CMO_MIN_ENT);
 473                /* Calculated used excess entitlement */
 474                if (viodev->cmo.allocated > viodev->cmo.entitled)
 475                        need += viodev->cmo.allocated - viodev->cmo.entitled;
 476        }
 477        cmo->excess.size = cmo->entitled - cmo->reserve.size;
 478        cmo->excess.free = cmo->excess.size - need;
 479
 480        cancel_delayed_work(to_delayed_work(work));
 481        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 482}
 483
 484static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
 485                                          dma_addr_t *dma_handle, gfp_t flag,
 486                                          struct dma_attrs *attrs)
 487{
 488        struct vio_dev *viodev = to_vio_dev(dev);
 489        void *ret;
 490
 491        if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
 492                atomic_inc(&viodev->cmo.allocs_failed);
 493                return NULL;
 494        }
 495
 496        ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs);
 497        if (unlikely(ret == NULL)) {
 498                vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
 499                atomic_inc(&viodev->cmo.allocs_failed);
 500        }
 501
 502        return ret;
 503}
 504
 505static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
 506                                        void *vaddr, dma_addr_t dma_handle,
 507                                        struct dma_attrs *attrs)
 508{
 509        struct vio_dev *viodev = to_vio_dev(dev);
 510
 511        dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs);
 512
 513        vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
 514}
 515
 516static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
 517                                         unsigned long offset, size_t size,
 518                                         enum dma_data_direction direction,
 519                                         struct dma_attrs *attrs)
 520{
 521        struct vio_dev *viodev = to_vio_dev(dev);
 522        dma_addr_t ret = DMA_ERROR_CODE;
 523
 524        if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
 525                atomic_inc(&viodev->cmo.allocs_failed);
 526                return ret;
 527        }
 528
 529        ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
 530        if (unlikely(dma_mapping_error(dev, ret))) {
 531                vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
 532                atomic_inc(&viodev->cmo.allocs_failed);
 533        }
 534
 535        return ret;
 536}
 537
 538static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 539                                     size_t size,
 540                                     enum dma_data_direction direction,
 541                                     struct dma_attrs *attrs)
 542{
 543        struct vio_dev *viodev = to_vio_dev(dev);
 544
 545        dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
 546
 547        vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
 548}
 549
 550static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 551                                int nelems, enum dma_data_direction direction,
 552                                struct dma_attrs *attrs)
 553{
 554        struct vio_dev *viodev = to_vio_dev(dev);
 555        struct scatterlist *sgl;
 556        int ret, count = 0;
 557        size_t alloc_size = 0;
 558
 559        for (sgl = sglist; count < nelems; count++, sgl++)
 560                alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
 561
 562        if (vio_cmo_alloc(viodev, alloc_size)) {
 563                atomic_inc(&viodev->cmo.allocs_failed);
 564                return 0;
 565        }
 566
 567        ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
 568
 569        if (unlikely(!ret)) {
 570                vio_cmo_dealloc(viodev, alloc_size);
 571                atomic_inc(&viodev->cmo.allocs_failed);
 572                return ret;
 573        }
 574
 575        for (sgl = sglist, count = 0; count < ret; count++, sgl++)
 576                alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
 577        if (alloc_size)
 578                vio_cmo_dealloc(viodev, alloc_size);
 579
 580        return ret;
 581}
 582
 583static void vio_dma_iommu_unmap_sg(struct device *dev,
 584                struct scatterlist *sglist, int nelems,
 585                enum dma_data_direction direction,
 586                struct dma_attrs *attrs)
 587{
 588        struct vio_dev *viodev = to_vio_dev(dev);
 589        struct scatterlist *sgl;
 590        size_t alloc_size = 0;
 591        int count = 0;
 592
 593        for (sgl = sglist; count < nelems; count++, sgl++)
 594                alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
 595
 596        dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
 597
 598        vio_cmo_dealloc(viodev, alloc_size);
 599}
 600
 601static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask)
 602{
 603        return dma_iommu_ops.dma_supported(dev, mask);
 604}
 605
 606static u64 vio_dma_get_required_mask(struct device *dev)
 607{
 608        return dma_iommu_ops.get_required_mask(dev);
 609}
 610
 611struct dma_map_ops vio_dma_mapping_ops = {
 612        .alloc             = vio_dma_iommu_alloc_coherent,
 613        .free              = vio_dma_iommu_free_coherent,
 614        .map_sg            = vio_dma_iommu_map_sg,
 615        .unmap_sg          = vio_dma_iommu_unmap_sg,
 616        .map_page          = vio_dma_iommu_map_page,
 617        .unmap_page        = vio_dma_iommu_unmap_page,
 618        .dma_supported     = vio_dma_iommu_dma_supported,
 619        .get_required_mask = vio_dma_get_required_mask,
 620};
 621
 622/**
 623 * vio_cmo_set_dev_desired - Set desired entitlement for a device
 624 *
 625 * @viodev: struct vio_dev for device to alter
 626 * @new_desired: new desired entitlement level in bytes
 627 *
 628 * For use by devices to request a change to their entitlement at runtime or
 629 * through sysfs.  The desired entitlement level is changed and a balancing
 630 * of system resources is scheduled to run in the future.
 631 */
 632void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
 633{
 634        unsigned long flags;
 635        struct vio_cmo_dev_entry *dev_ent;
 636        int found = 0;
 637
 638        if (!firmware_has_feature(FW_FEATURE_CMO))
 639                return;
 640
 641        spin_lock_irqsave(&vio_cmo.lock, flags);
 642        if (desired < VIO_CMO_MIN_ENT)
 643                desired = VIO_CMO_MIN_ENT;
 644
 645        /*
 646         * Changes will not be made for devices not in the device list.
 647         * If it is not in the device list, then no driver is loaded
 648         * for the device and it can not receive entitlement.
 649         */
 650        list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
 651                if (viodev == dev_ent->viodev) {
 652                        found = 1;
 653                        break;
 654                }
 655        if (!found) {
 656                spin_unlock_irqrestore(&vio_cmo.lock, flags);
 657                return;
 658        }
 659
 660        /* Increase/decrease in desired device entitlement */
 661        if (desired >= viodev->cmo.desired) {
 662                /* Just bump the bus and device values prior to a balance*/
 663                vio_cmo.desired += desired - viodev->cmo.desired;
 664                viodev->cmo.desired = desired;
 665        } else {
 666                /* Decrease bus and device values for desired entitlement */
 667                vio_cmo.desired -= viodev->cmo.desired - desired;
 668                viodev->cmo.desired = desired;
 669                /*
 670                 * If less entitlement is desired than current entitlement, move
 671                 * any reserve memory in the change region to the excess pool.
 672                 */
 673                if (viodev->cmo.entitled > desired) {
 674                        vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
 675                        vio_cmo.excess.size += viodev->cmo.entitled - desired;
 676                        /*
 677                         * If entitlement moving from the reserve pool to the
 678                         * excess pool is currently unused, add to the excess
 679                         * free counter.
 680                         */
 681                        if (viodev->cmo.allocated < viodev->cmo.entitled)
 682                                vio_cmo.excess.free += viodev->cmo.entitled -
 683                                                       max(viodev->cmo.allocated, desired);
 684                        viodev->cmo.entitled = desired;
 685                }
 686        }
 687        schedule_delayed_work(&vio_cmo.balance_q, 0);
 688        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 689}
 690
 691/**
 692 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
 693 *
 694 * @viodev - Pointer to struct vio_dev for device
 695 *
 696 * Determine the devices IO memory entitlement needs, attempting
 697 * to satisfy the system minimum entitlement at first and scheduling
 698 * a balance operation to take care of the rest at a later time.
 699 *
 700 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
 701 *          -ENOMEM when entitlement is not available for device or
 702 *          device entry.
 703 *
 704 */
 705static int vio_cmo_bus_probe(struct vio_dev *viodev)
 706{
 707        struct vio_cmo_dev_entry *dev_ent;
 708        struct device *dev = &viodev->dev;
 709        struct vio_driver *viodrv = to_vio_driver(dev->driver);
 710        unsigned long flags;
 711        size_t size;
 712
 713        /*
 714         * Check to see that device has a DMA window and configure
 715         * entitlement for the device.
 716         */
 717        if (of_get_property(viodev->dev.of_node,
 718                            "ibm,my-dma-window", NULL)) {
 719                /* Check that the driver is CMO enabled and get desired DMA */
 720                if (!viodrv->get_desired_dma) {
 721                        dev_err(dev, "%s: device driver does not support CMO\n",
 722                                __func__);
 723                        return -EINVAL;
 724                }
 725
 726                viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
 727                if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
 728                        viodev->cmo.desired = VIO_CMO_MIN_ENT;
 729                size = VIO_CMO_MIN_ENT;
 730
 731                dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
 732                                  GFP_KERNEL);
 733                if (!dev_ent)
 734                        return -ENOMEM;
 735
 736                dev_ent->viodev = viodev;
 737                spin_lock_irqsave(&vio_cmo.lock, flags);
 738                list_add(&dev_ent->list, &vio_cmo.device_list);
 739        } else {
 740                viodev->cmo.desired = 0;
 741                size = 0;
 742                spin_lock_irqsave(&vio_cmo.lock, flags);
 743        }
 744
 745        /*
 746         * If the needs for vio_cmo.min have not changed since they
 747         * were last set, the number of devices in the OF tree has
 748         * been constant and the IO memory for this is already in
 749         * the reserve pool.
 750         */
 751        if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
 752                            VIO_CMO_MIN_ENT)) {
 753                /* Updated desired entitlement if device requires it */
 754                if (size)
 755                        vio_cmo.desired += (viodev->cmo.desired -
 756                                        VIO_CMO_MIN_ENT);
 757        } else {
 758                size_t tmp;
 759
 760                tmp = vio_cmo.spare + vio_cmo.excess.free;
 761                if (tmp < size) {
 762                        dev_err(dev, "%s: insufficient free "
 763                                "entitlement to add device. "
 764                                "Need %lu, have %lu\n", __func__,
 765                                size, (vio_cmo.spare + tmp));
 766                        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 767                        return -ENOMEM;
 768                }
 769
 770                /* Use excess pool first to fulfill request */
 771                tmp = min(size, vio_cmo.excess.free);
 772                vio_cmo.excess.free -= tmp;
 773                vio_cmo.excess.size -= tmp;
 774                vio_cmo.reserve.size += tmp;
 775
 776                /* Use spare if excess pool was insufficient */
 777                vio_cmo.spare -= size - tmp;
 778
 779                /* Update bus accounting */
 780                vio_cmo.min += size;
 781                vio_cmo.desired += viodev->cmo.desired;
 782        }
 783        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 784        return 0;
 785}
 786
 787/**
 788 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
 789 *
 790 * @viodev - Pointer to struct vio_dev for device
 791 *
 792 * Remove the device from the cmo device list.  The minimum entitlement
 793 * will be reserved for the device as long as it is in the system.  The
 794 * rest of the entitlement the device had been allocated will be returned
 795 * to the system.
 796 */
 797static void vio_cmo_bus_remove(struct vio_dev *viodev)
 798{
 799        struct vio_cmo_dev_entry *dev_ent;
 800        unsigned long flags;
 801        size_t tmp;
 802
 803        spin_lock_irqsave(&vio_cmo.lock, flags);
 804        if (viodev->cmo.allocated) {
 805                dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
 806                        "allocated after remove operation.\n",
 807                        __func__, viodev->cmo.allocated);
 808                BUG();
 809        }
 810
 811        /*
 812         * Remove the device from the device list being maintained for
 813         * CMO enabled devices.
 814         */
 815        list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
 816                if (viodev == dev_ent->viodev) {
 817                        list_del(&dev_ent->list);
 818                        kfree(dev_ent);
 819                        break;
 820                }
 821
 822        /*
 823         * Devices may not require any entitlement and they do not need
 824         * to be processed.  Otherwise, return the device's entitlement
 825         * back to the pools.
 826         */
 827        if (viodev->cmo.entitled) {
 828                /*
 829                 * This device has not yet left the OF tree, it's
 830                 * minimum entitlement remains in vio_cmo.min and
 831                 * vio_cmo.desired
 832                 */
 833                vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
 834
 835                /*
 836                 * Save min allocation for device in reserve as long
 837                 * as it exists in OF tree as determined by later
 838                 * balance operation
 839                 */
 840                viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
 841
 842                /* Replenish spare from freed reserve pool */
 843                if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
 844                        tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
 845                                                         vio_cmo.spare));
 846                        vio_cmo.spare += tmp;
 847                        viodev->cmo.entitled -= tmp;
 848                }
 849
 850                /* Remaining reserve goes to excess pool */
 851                vio_cmo.excess.size += viodev->cmo.entitled;
 852                vio_cmo.excess.free += viodev->cmo.entitled;
 853                vio_cmo.reserve.size -= viodev->cmo.entitled;
 854
 855                /*
 856                 * Until the device is removed it will keep a
 857                 * minimum entitlement; this will guarantee that
 858                 * a module unload/load will result in a success.
 859                 */
 860                viodev->cmo.entitled = VIO_CMO_MIN_ENT;
 861                viodev->cmo.desired = VIO_CMO_MIN_ENT;
 862                atomic_set(&viodev->cmo.allocs_failed, 0);
 863        }
 864
 865        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 866}
 867
 868static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
 869{
 870        set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
 871}
 872
 873/**
 874 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
 875 *
 876 * Set up the reserve and excess entitlement pools based on available
 877 * system entitlement and the number of devices in the OF tree that
 878 * require entitlement in the reserve pool.
 879 */
 880static void vio_cmo_bus_init(void)
 881{
 882        struct hvcall_mpp_data mpp_data;
 883        int err;
 884
 885        memset(&vio_cmo, 0, sizeof(struct vio_cmo));
 886        spin_lock_init(&vio_cmo.lock);
 887        INIT_LIST_HEAD(&vio_cmo.device_list);
 888        INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
 889
 890        /* Get current system entitlement */
 891        err = h_get_mpp(&mpp_data);
 892
 893        /*
 894         * On failure, continue with entitlement set to 0, will panic()
 895         * later when spare is reserved.
 896         */
 897        if (err != H_SUCCESS) {
 898                printk(KERN_ERR "%s: unable to determine system IO "\
 899                       "entitlement. (%d)\n", __func__, err);
 900                vio_cmo.entitled = 0;
 901        } else {
 902                vio_cmo.entitled = mpp_data.entitled_mem;
 903        }
 904
 905        /* Set reservation and check against entitlement */
 906        vio_cmo.spare = VIO_CMO_MIN_ENT;
 907        vio_cmo.reserve.size = vio_cmo.spare;
 908        vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
 909                                 VIO_CMO_MIN_ENT);
 910        if (vio_cmo.reserve.size > vio_cmo.entitled) {
 911                printk(KERN_ERR "%s: insufficient system entitlement\n",
 912                       __func__);
 913                panic("%s: Insufficient system entitlement", __func__);
 914        }
 915
 916        /* Set the remaining accounting variables */
 917        vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
 918        vio_cmo.excess.free = vio_cmo.excess.size;
 919        vio_cmo.min = vio_cmo.reserve.size;
 920        vio_cmo.desired = vio_cmo.reserve.size;
 921}
 922
 923/* sysfs device functions and data structures for CMO */
 924
 925#define viodev_cmo_rd_attr(name)                                        \
 926static ssize_t viodev_cmo_##name##_show(struct device *dev,             \
 927                                        struct device_attribute *attr,  \
 928                                         char *buf)                     \
 929{                                                                       \
 930        return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
 931}
 932
 933static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
 934                struct device_attribute *attr, char *buf)
 935{
 936        struct vio_dev *viodev = to_vio_dev(dev);
 937        return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
 938}
 939
 940static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
 941                struct device_attribute *attr, const char *buf, size_t count)
 942{
 943        struct vio_dev *viodev = to_vio_dev(dev);
 944        atomic_set(&viodev->cmo.allocs_failed, 0);
 945        return count;
 946}
 947
 948static ssize_t viodev_cmo_desired_set(struct device *dev,
 949                struct device_attribute *attr, const char *buf, size_t count)
 950{
 951        struct vio_dev *viodev = to_vio_dev(dev);
 952        size_t new_desired;
 953        int ret;
 954
 955        ret = strict_strtoul(buf, 10, &new_desired);
 956        if (ret)
 957                return ret;
 958
 959        vio_cmo_set_dev_desired(viodev, new_desired);
 960        return count;
 961}
 962
 963viodev_cmo_rd_attr(desired);
 964viodev_cmo_rd_attr(entitled);
 965viodev_cmo_rd_attr(allocated);
 966
 967static ssize_t name_show(struct device *, struct device_attribute *, char *);
 968static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
 969static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 970                             char *buf);
 971static struct device_attribute vio_cmo_dev_attrs[] = {
 972        __ATTR_RO(name),
 973        __ATTR_RO(devspec),
 974        __ATTR_RO(modalias),
 975        __ATTR(cmo_desired,       S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
 976               viodev_cmo_desired_show, viodev_cmo_desired_set),
 977        __ATTR(cmo_entitled,      S_IRUGO, viodev_cmo_entitled_show,      NULL),
 978        __ATTR(cmo_allocated,     S_IRUGO, viodev_cmo_allocated_show,     NULL),
 979        __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
 980               viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
 981        __ATTR_NULL
 982};
 983
 984/* sysfs bus functions and data structures for CMO */
 985
 986#define viobus_cmo_rd_attr(name)                                        \
 987static ssize_t                                                          \
 988viobus_cmo_##name##_show(struct bus_type *bt, char *buf)                \
 989{                                                                       \
 990        return sprintf(buf, "%lu\n", vio_cmo.name);                     \
 991}
 992
 993#define viobus_cmo_pool_rd_attr(name, var)                              \
 994static ssize_t                                                          \
 995viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf)     \
 996{                                                                       \
 997        return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
 998}
 999
1000static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
1001                                     size_t count)
1002{
1003        unsigned long flags;
1004
1005        spin_lock_irqsave(&vio_cmo.lock, flags);
1006        vio_cmo.high = vio_cmo.curr;
1007        spin_unlock_irqrestore(&vio_cmo.lock, flags);
1008
1009        return count;
1010}
1011
1012viobus_cmo_rd_attr(entitled);
1013viobus_cmo_pool_rd_attr(reserve, size);
1014viobus_cmo_pool_rd_attr(excess, size);
1015viobus_cmo_pool_rd_attr(excess, free);
1016viobus_cmo_rd_attr(spare);
1017viobus_cmo_rd_attr(min);
1018viobus_cmo_rd_attr(desired);
1019viobus_cmo_rd_attr(curr);
1020viobus_cmo_rd_attr(high);
1021
1022static struct bus_attribute vio_cmo_bus_attrs[] = {
1023        __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
1024        __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
1025        __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
1026        __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
1027        __ATTR(cmo_spare,   S_IRUGO, viobus_cmo_spare_show,   NULL),
1028        __ATTR(cmo_min,     S_IRUGO, viobus_cmo_min_show,     NULL),
1029        __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
1030        __ATTR(cmo_curr,    S_IRUGO, viobus_cmo_curr_show,    NULL),
1031        __ATTR(cmo_high,    S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
1032               viobus_cmo_high_show, viobus_cmo_high_reset),
1033        __ATTR_NULL
1034};
1035
1036static void vio_cmo_sysfs_init(void)
1037{
1038        vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
1039        vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
1040}
1041#else /* CONFIG_PPC_SMLPAR */
1042int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1043void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1044static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1045static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1046static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1047static void vio_cmo_bus_init(void) {}
1048static void vio_cmo_sysfs_init(void) { }
1049#endif /* CONFIG_PPC_SMLPAR */
1050EXPORT_SYMBOL(vio_cmo_entitlement_update);
1051EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1052
1053static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1054{
1055        const unsigned char *dma_window;
1056        struct iommu_table *tbl;
1057        unsigned long offset, size;
1058
1059        dma_window = of_get_property(dev->dev.of_node,
1060                                  "ibm,my-dma-window", NULL);
1061        if (!dma_window)
1062                return NULL;
1063
1064        tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
1065        if (tbl == NULL)
1066                return NULL;
1067
1068        of_parse_dma_window(dev->dev.of_node, dma_window,
1069                            &tbl->it_index, &offset, &size);
1070
1071        /* TCE table size - measured in tce entries */
1072        tbl->it_size = size >> IOMMU_PAGE_SHIFT;
1073        /* offset for VIO should always be 0 */
1074        tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
1075        tbl->it_busno = 0;
1076        tbl->it_type = TCE_VB;
1077        tbl->it_blocksize = 16;
1078
1079        return iommu_init_table(tbl, -1);
1080}
1081
1082/**
1083 * vio_match_device: - Tell if a VIO device has a matching
1084 *                      VIO device id structure.
1085 * @ids:        array of VIO device id structures to search in
1086 * @dev:        the VIO device structure to match against
1087 *
1088 * Used by a driver to check whether a VIO device present in the
1089 * system is in its list of supported devices. Returns the matching
1090 * vio_device_id structure or NULL if there is no match.
1091 */
1092static const struct vio_device_id *vio_match_device(
1093                const struct vio_device_id *ids, const struct vio_dev *dev)
1094{
1095        while (ids->type[0] != '\0') {
1096                if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
1097                    of_device_is_compatible(dev->dev.of_node,
1098                                         ids->compat))
1099                        return ids;
1100                ids++;
1101        }
1102        return NULL;
1103}
1104
1105/*
1106 * Convert from struct device to struct vio_dev and pass to driver.
1107 * dev->driver has already been set by generic code because vio_bus_match
1108 * succeeded.
1109 */
1110static int vio_bus_probe(struct device *dev)
1111{
1112        struct vio_dev *viodev = to_vio_dev(dev);
1113        struct vio_driver *viodrv = to_vio_driver(dev->driver);
1114        const struct vio_device_id *id;
1115        int error = -ENODEV;
1116
1117        if (!viodrv->probe)
1118                return error;
1119
1120        id = vio_match_device(viodrv->id_table, viodev);
1121        if (id) {
1122                memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1123                if (firmware_has_feature(FW_FEATURE_CMO)) {
1124                        error = vio_cmo_bus_probe(viodev);
1125                        if (error)
1126                                return error;
1127                }
1128                error = viodrv->probe(viodev, id);
1129                if (error && firmware_has_feature(FW_FEATURE_CMO))
1130                        vio_cmo_bus_remove(viodev);
1131        }
1132
1133        return error;
1134}
1135
1136/* convert from struct device to struct vio_dev and pass to driver. */
1137static int vio_bus_remove(struct device *dev)
1138{
1139        struct vio_dev *viodev = to_vio_dev(dev);
1140        struct vio_driver *viodrv = to_vio_driver(dev->driver);
1141        struct device *devptr;
1142        int ret = 1;
1143
1144        /*
1145         * Hold a reference to the device after the remove function is called
1146         * to allow for CMO accounting cleanup for the device.
1147         */
1148        devptr = get_device(dev);
1149
1150        if (viodrv->remove)
1151                ret = viodrv->remove(viodev);
1152
1153        if (!ret && firmware_has_feature(FW_FEATURE_CMO))
1154                vio_cmo_bus_remove(viodev);
1155
1156        put_device(devptr);
1157        return ret;
1158}
1159
1160/**
1161 * vio_register_driver: - Register a new vio driver
1162 * @drv:        The vio_driver structure to be registered.
1163 */
1164int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
1165                          const char *mod_name)
1166{
1167        pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
1168
1169        /* fill in 'struct driver' fields */
1170        viodrv->driver.name = viodrv->name;
1171        viodrv->driver.pm = viodrv->pm;
1172        viodrv->driver.bus = &vio_bus_type;
1173        viodrv->driver.owner = owner;
1174        viodrv->driver.mod_name = mod_name;
1175
1176        return driver_register(&viodrv->driver);
1177}
1178EXPORT_SYMBOL(__vio_register_driver);
1179
1180/**
1181 * vio_unregister_driver - Remove registration of vio driver.
1182 * @driver:     The vio_driver struct to be removed form registration
1183 */
1184void vio_unregister_driver(struct vio_driver *viodrv)
1185{
1186        driver_unregister(&viodrv->driver);
1187}
1188EXPORT_SYMBOL(vio_unregister_driver);
1189
1190/* vio_dev refcount hit 0 */
1191static void __devinit vio_dev_release(struct device *dev)
1192{
1193        struct iommu_table *tbl = get_iommu_table_base(dev);
1194
1195        if (tbl)
1196                iommu_free_table(tbl, dev->of_node ?
1197                        dev->of_node->full_name : dev_name(dev));
1198        of_node_put(dev->of_node);
1199        kfree(to_vio_dev(dev));
1200}
1201
1202/**
1203 * vio_register_device_node: - Register a new vio device.
1204 * @of_node:    The OF node for this device.
1205 *
1206 * Creates and initializes a vio_dev structure from the data in
1207 * of_node and adds it to the list of virtual devices.
1208 * Returns a pointer to the created vio_dev or NULL if node has
1209 * NULL device_type or compatible fields.
1210 */
1211struct vio_dev *vio_register_device_node(struct device_node *of_node)
1212{
1213        struct vio_dev *viodev;
1214        const unsigned int *unit_address;
1215
1216        /* we need the 'device_type' property, in order to match with drivers */
1217        if (of_node->type == NULL) {
1218                printk(KERN_WARNING "%s: node %s missing 'device_type'\n",
1219                                __func__,
1220                                of_node->name ? of_node->name : "<unknown>");
1221                return NULL;
1222        }
1223
1224        unit_address = of_get_property(of_node, "reg", NULL);
1225        if (unit_address == NULL) {
1226                printk(KERN_WARNING "%s: node %s missing 'reg'\n",
1227                                __func__,
1228                                of_node->name ? of_node->name : "<unknown>");
1229                return NULL;
1230        }
1231
1232        /* allocate a vio_dev for this node */
1233        viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
1234        if (viodev == NULL)
1235                return NULL;
1236
1237        viodev->irq = irq_of_parse_and_map(of_node, 0);
1238
1239        dev_set_name(&viodev->dev, "%x", *unit_address);
1240        viodev->name = of_node->name;
1241        viodev->type = of_node->type;
1242        viodev->unit_address = *unit_address;
1243        viodev->dev.of_node = of_node_get(of_node);
1244
1245        if (firmware_has_feature(FW_FEATURE_CMO))
1246                vio_cmo_set_dma_ops(viodev);
1247        else
1248                set_dma_ops(&viodev->dev, &dma_iommu_ops);
1249        set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
1250        set_dev_node(&viodev->dev, of_node_to_nid(of_node));
1251
1252        /* init generic 'struct device' fields: */
1253        viodev->dev.parent = &vio_bus_device.dev;
1254        viodev->dev.bus = &vio_bus_type;
1255        viodev->dev.release = vio_dev_release;
1256        /* needed to ensure proper operation of coherent allocations
1257         * later, in case driver doesn't set it explicitly */
1258        dma_set_mask(&viodev->dev, DMA_BIT_MASK(64));
1259        dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64));
1260
1261        /* register with generic device framework */
1262        if (device_register(&viodev->dev)) {
1263                printk(KERN_ERR "%s: failed to register device %s\n",
1264                                __func__, dev_name(&viodev->dev));
1265                put_device(&viodev->dev);
1266                return NULL;
1267        }
1268
1269        return viodev;
1270}
1271EXPORT_SYMBOL(vio_register_device_node);
1272
1273/**
1274 * vio_bus_init: - Initialize the virtual IO bus
1275 */
1276static int __init vio_bus_init(void)
1277{
1278        int err;
1279        struct device_node *node_vroot;
1280
1281        if (firmware_has_feature(FW_FEATURE_CMO))
1282                vio_cmo_sysfs_init();
1283
1284        err = bus_register(&vio_bus_type);
1285        if (err) {
1286                printk(KERN_ERR "failed to register VIO bus\n");
1287                return err;
1288        }
1289
1290        /*
1291         * The fake parent of all vio devices, just to give us
1292         * a nice directory
1293         */
1294        err = device_register(&vio_bus_device.dev);
1295        if (err) {
1296                printk(KERN_WARNING "%s: device_register returned %i\n",
1297                                __func__, err);
1298                return err;
1299        }
1300
1301        if (firmware_has_feature(FW_FEATURE_CMO))
1302                vio_cmo_bus_init();
1303
1304        node_vroot = of_find_node_by_name(NULL, "vdevice");
1305        if (node_vroot) {
1306                struct device_node *of_node;
1307
1308                /*
1309                 * Create struct vio_devices for each virtual device in
1310                 * the device tree. Drivers will associate with them later.
1311                 */
1312                for (of_node = node_vroot->child; of_node != NULL;
1313                                of_node = of_node->sibling)
1314                        vio_register_device_node(of_node);
1315                of_node_put(node_vroot);
1316        }
1317
1318        return 0;
1319}
1320__initcall(vio_bus_init);
1321
1322static ssize_t name_show(struct device *dev,
1323                struct device_attribute *attr, char *buf)
1324{
1325        return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
1326}
1327
1328static ssize_t devspec_show(struct device *dev,
1329                struct device_attribute *attr, char *buf)
1330{
1331        struct device_node *of_node = dev->of_node;
1332
1333        return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none");
1334}
1335
1336static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1337                             char *buf)
1338{
1339        const struct vio_dev *vio_dev = to_vio_dev(dev);
1340        struct device_node *dn;
1341        const char *cp;
1342
1343        dn = dev->of_node;
1344        if (!dn)
1345                return -ENODEV;
1346        cp = of_get_property(dn, "compatible", NULL);
1347        if (!cp)
1348                return -ENODEV;
1349
1350        return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
1351}
1352
1353static struct device_attribute vio_dev_attrs[] = {
1354        __ATTR_RO(name),
1355        __ATTR_RO(devspec),
1356        __ATTR_RO(modalias),
1357        __ATTR_NULL
1358};
1359
1360void __devinit vio_unregister_device(struct vio_dev *viodev)
1361{
1362        device_unregister(&viodev->dev);
1363}
1364EXPORT_SYMBOL(vio_unregister_device);
1365
1366static int vio_bus_match(struct device *dev, struct device_driver *drv)
1367{
1368        const struct vio_dev *vio_dev = to_vio_dev(dev);
1369        struct vio_driver *vio_drv = to_vio_driver(drv);
1370        const struct vio_device_id *ids = vio_drv->id_table;
1371
1372        return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
1373}
1374
1375static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
1376{
1377        const struct vio_dev *vio_dev = to_vio_dev(dev);
1378        struct device_node *dn;
1379        const char *cp;
1380
1381        dn = dev->of_node;
1382        if (!dn)
1383                return -ENODEV;
1384        cp = of_get_property(dn, "compatible", NULL);
1385        if (!cp)
1386                return -ENODEV;
1387
1388        add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
1389        return 0;
1390}
1391
1392static struct bus_type vio_bus_type = {
1393        .name = "vio",
1394        .dev_attrs = vio_dev_attrs,
1395        .uevent = vio_hotplug,
1396        .match = vio_bus_match,
1397        .probe = vio_bus_probe,
1398        .remove = vio_bus_remove,
1399};
1400
1401/**
1402 * vio_get_attribute: - get attribute for virtual device
1403 * @vdev:       The vio device to get property.
1404 * @which:      The property/attribute to be extracted.
1405 * @length:     Pointer to length of returned data size (unused if NULL).
1406 *
1407 * Calls prom.c's of_get_property() to return the value of the
1408 * attribute specified by @which
1409*/
1410const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
1411{
1412        return of_get_property(vdev->dev.of_node, which, length);
1413}
1414EXPORT_SYMBOL(vio_get_attribute);
1415
1416#ifdef CONFIG_PPC_PSERIES
1417/* vio_find_name() - internal because only vio.c knows how we formatted the
1418 * kobject name
1419 */
1420static struct vio_dev *vio_find_name(const char *name)
1421{
1422        struct device *found;
1423
1424        found = bus_find_device_by_name(&vio_bus_type, NULL, name);
1425        if (!found)
1426                return NULL;
1427
1428        return to_vio_dev(found);
1429}
1430
1431/**
1432 * vio_find_node - find an already-registered vio_dev
1433 * @vnode: device_node of the virtual device we're looking for
1434 */
1435struct vio_dev *vio_find_node(struct device_node *vnode)
1436{
1437        const uint32_t *unit_address;
1438        char kobj_name[20];
1439
1440        /* construct the kobject name from the device node */
1441        unit_address = of_get_property(vnode, "reg", NULL);
1442        if (!unit_address)
1443                return NULL;
1444        snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address);
1445
1446        return vio_find_name(kobj_name);
1447}
1448EXPORT_SYMBOL(vio_find_node);
1449
1450int vio_enable_interrupts(struct vio_dev *dev)
1451{
1452        int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
1453        if (rc != H_SUCCESS)
1454                printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
1455        return rc;
1456}
1457EXPORT_SYMBOL(vio_enable_interrupts);
1458
1459int vio_disable_interrupts(struct vio_dev *dev)
1460{
1461        int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
1462        if (rc != H_SUCCESS)
1463                printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
1464        return rc;
1465}
1466EXPORT_SYMBOL(vio_disable_interrupts);
1467#endif /* CONFIG_PPC_PSERIES */
1468