linux/arch/powerpc/platforms/pseries/vio.c
<<
>>
Prefs
   1/*
   2 * IBM PowerPC Virtual I/O Infrastructure Support.
   3 *
   4 *    Copyright (c) 2003,2008 IBM Corp.
   5 *     Dave Engebretsen engebret@us.ibm.com
   6 *     Santiago Leon santil@us.ibm.com
   7 *     Hollis Blanchard <hollisb@us.ibm.com>
   8 *     Stephen Rothwell
   9 *     Robert Jennings <rcjenn@us.ibm.com>
  10 *
  11 *      This program is free software; you can redistribute it and/or
  12 *      modify it under the terms of the GNU General Public License
  13 *      as published by the Free Software Foundation; either version
  14 *      2 of the License, or (at your option) any later version.
  15 */
  16
  17#include <linux/cpu.h>
  18#include <linux/types.h>
  19#include <linux/delay.h>
  20#include <linux/stat.h>
  21#include <linux/device.h>
  22#include <linux/init.h>
  23#include <linux/slab.h>
  24#include <linux/console.h>
  25#include <linux/export.h>
  26#include <linux/mm.h>
  27#include <linux/dma-mapping.h>
  28#include <linux/kobject.h>
  29
  30#include <asm/iommu.h>
  31#include <asm/dma.h>
  32#include <asm/vio.h>
  33#include <asm/prom.h>
  34#include <asm/firmware.h>
  35#include <asm/tce.h>
  36#include <asm/page.h>
  37#include <asm/hvcall.h>
  38#include <asm/machdep.h>
  39
  40static struct vio_dev vio_bus_device  = { /* fake "parent" device */
  41        .name = "vio",
  42        .type = "",
  43        .dev.init_name = "vio",
  44        .dev.bus = &vio_bus_type,
  45};
  46
  47#ifdef CONFIG_PPC_SMLPAR
  48/**
  49 * vio_cmo_pool - A pool of IO memory for CMO use
  50 *
  51 * @size: The size of the pool in bytes
  52 * @free: The amount of free memory in the pool
  53 */
  54struct vio_cmo_pool {
  55        size_t size;
  56        size_t free;
  57};
  58
  59/* How many ms to delay queued balance work */
  60#define VIO_CMO_BALANCE_DELAY 100
  61
  62/* Portion out IO memory to CMO devices by this chunk size */
  63#define VIO_CMO_BALANCE_CHUNK 131072
  64
  65/**
  66 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
  67 *
  68 * @vio_dev: struct vio_dev pointer
  69 * @list: pointer to other devices on bus that are being tracked
  70 */
  71struct vio_cmo_dev_entry {
  72        struct vio_dev *viodev;
  73        struct list_head list;
  74};
  75
  76/**
  77 * vio_cmo - VIO bus accounting structure for CMO entitlement
  78 *
  79 * @lock: spinlock for entire structure
  80 * @balance_q: work queue for balancing system entitlement
  81 * @device_list: list of CMO-enabled devices requiring entitlement
  82 * @entitled: total system entitlement in bytes
  83 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
  84 * @excess: pool of excess entitlement not needed for device reserves or spare
  85 * @spare: IO memory for device hotplug functionality
  86 * @min: minimum necessary for system operation
  87 * @desired: desired memory for system operation
  88 * @curr: bytes currently allocated
  89 * @high: high water mark for IO data usage
  90 */
  91static struct vio_cmo {
  92        spinlock_t lock;
  93        struct delayed_work balance_q;
  94        struct list_head device_list;
  95        size_t entitled;
  96        struct vio_cmo_pool reserve;
  97        struct vio_cmo_pool excess;
  98        size_t spare;
  99        size_t min;
 100        size_t desired;
 101        size_t curr;
 102        size_t high;
 103} vio_cmo;
 104
 105/**
 106 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
 107 */
 108static int vio_cmo_num_OF_devs(void)
 109{
 110        struct device_node *node_vroot;
 111        int count = 0;
 112
 113        /*
 114         * Count the number of vdevice entries with an
 115         * ibm,my-dma-window OF property
 116         */
 117        node_vroot = of_find_node_by_name(NULL, "vdevice");
 118        if (node_vroot) {
 119                struct device_node *of_node;
 120                struct property *prop;
 121
 122                for_each_child_of_node(node_vroot, of_node) {
 123                        prop = of_find_property(of_node, "ibm,my-dma-window",
 124                                               NULL);
 125                        if (prop)
 126                                count++;
 127                }
 128        }
 129        of_node_put(node_vroot);
 130        return count;
 131}
 132
 133/**
 134 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
 135 *
 136 * @viodev: VIO device requesting IO memory
 137 * @size: size of allocation requested
 138 *
 139 * Allocations come from memory reserved for the devices and any excess
 140 * IO memory available to all devices.  The spare pool used to service
 141 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
 142 * made available.
 143 *
 144 * Return codes:
 145 *  0 for successful allocation and -ENOMEM for a failure
 146 */
 147static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
 148{
 149        unsigned long flags;
 150        size_t reserve_free = 0;
 151        size_t excess_free = 0;
 152        int ret = -ENOMEM;
 153
 154        spin_lock_irqsave(&vio_cmo.lock, flags);
 155
 156        /* Determine the amount of free entitlement available in reserve */
 157        if (viodev->cmo.entitled > viodev->cmo.allocated)
 158                reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
 159
 160        /* If spare is not fulfilled, the excess pool can not be used. */
 161        if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
 162                excess_free = vio_cmo.excess.free;
 163
 164        /* The request can be satisfied */
 165        if ((reserve_free + excess_free) >= size) {
 166                vio_cmo.curr += size;
 167                if (vio_cmo.curr > vio_cmo.high)
 168                        vio_cmo.high = vio_cmo.curr;
 169                viodev->cmo.allocated += size;
 170                size -= min(reserve_free, size);
 171                vio_cmo.excess.free -= size;
 172                ret = 0;
 173        }
 174
 175        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 176        return ret;
 177}
 178
 179/**
 180 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
 181 * @viodev: VIO device freeing IO memory
 182 * @size: size of deallocation
 183 *
 184 * IO memory is freed by the device back to the correct memory pools.
 185 * The spare pool is replenished first from either memory pool, then
 186 * the reserve pool is used to reduce device entitlement, the excess
 187 * pool is used to increase the reserve pool toward the desired entitlement
 188 * target, and then the remaining memory is returned to the pools.
 189 *
 190 */
 191static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
 192{
 193        unsigned long flags;
 194        size_t spare_needed = 0;
 195        size_t excess_freed = 0;
 196        size_t reserve_freed = size;
 197        size_t tmp;
 198        int balance = 0;
 199
 200        spin_lock_irqsave(&vio_cmo.lock, flags);
 201        vio_cmo.curr -= size;
 202
 203        /* Amount of memory freed from the excess pool */
 204        if (viodev->cmo.allocated > viodev->cmo.entitled) {
 205                excess_freed = min(reserve_freed, (viodev->cmo.allocated -
 206                                                   viodev->cmo.entitled));
 207                reserve_freed -= excess_freed;
 208        }
 209
 210        /* Remove allocation from device */
 211        viodev->cmo.allocated -= (reserve_freed + excess_freed);
 212
 213        /* Spare is a subset of the reserve pool, replenish it first. */
 214        spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
 215
 216        /*
 217         * Replenish the spare in the reserve pool from the excess pool.
 218         * This moves entitlement into the reserve pool.
 219         */
 220        if (spare_needed && excess_freed) {
 221                tmp = min(excess_freed, spare_needed);
 222                vio_cmo.excess.size -= tmp;
 223                vio_cmo.reserve.size += tmp;
 224                vio_cmo.spare += tmp;
 225                excess_freed -= tmp;
 226                spare_needed -= tmp;
 227                balance = 1;
 228        }
 229
 230        /*
 231         * Replenish the spare in the reserve pool from the reserve pool.
 232         * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
 233         * if needed, and gives it to the spare pool. The amount of used
 234         * memory in this pool does not change.
 235         */
 236        if (spare_needed && reserve_freed) {
 237                tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
 238
 239                vio_cmo.spare += tmp;
 240                viodev->cmo.entitled -= tmp;
 241                reserve_freed -= tmp;
 242                spare_needed -= tmp;
 243                balance = 1;
 244        }
 245
 246        /*
 247         * Increase the reserve pool until the desired allocation is met.
 248         * Move an allocation freed from the excess pool into the reserve
 249         * pool and schedule a balance operation.
 250         */
 251        if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
 252                tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
 253
 254                vio_cmo.excess.size -= tmp;
 255                vio_cmo.reserve.size += tmp;
 256                excess_freed -= tmp;
 257                balance = 1;
 258        }
 259
 260        /* Return memory from the excess pool to that pool */
 261        if (excess_freed)
 262                vio_cmo.excess.free += excess_freed;
 263
 264        if (balance)
 265                schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
 266        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 267}
 268
 269/**
 270 * vio_cmo_entitlement_update - Manage system entitlement changes
 271 *
 272 * @new_entitlement: new system entitlement to attempt to accommodate
 273 *
 274 * Increases in entitlement will be used to fulfill the spare entitlement
 275 * and the rest is given to the excess pool.  Decreases, if they are
 276 * possible, come from the excess pool and from unused device entitlement
 277 *
 278 * Returns: 0 on success, -ENOMEM when change can not be made
 279 */
 280int vio_cmo_entitlement_update(size_t new_entitlement)
 281{
 282        struct vio_dev *viodev;
 283        struct vio_cmo_dev_entry *dev_ent;
 284        unsigned long flags;
 285        size_t avail, delta, tmp;
 286
 287        spin_lock_irqsave(&vio_cmo.lock, flags);
 288
 289        /* Entitlement increases */
 290        if (new_entitlement > vio_cmo.entitled) {
 291                delta = new_entitlement - vio_cmo.entitled;
 292
 293                /* Fulfill spare allocation */
 294                if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
 295                        tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
 296                        vio_cmo.spare += tmp;
 297                        vio_cmo.reserve.size += tmp;
 298                        delta -= tmp;
 299                }
 300
 301                /* Remaining new allocation goes to the excess pool */
 302                vio_cmo.entitled += delta;
 303                vio_cmo.excess.size += delta;
 304                vio_cmo.excess.free += delta;
 305
 306                goto out;
 307        }
 308
 309        /* Entitlement decreases */
 310        delta = vio_cmo.entitled - new_entitlement;
 311        avail = vio_cmo.excess.free;
 312
 313        /*
 314         * Need to check how much unused entitlement each device can
 315         * sacrifice to fulfill entitlement change.
 316         */
 317        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 318                if (avail >= delta)
 319                        break;
 320
 321                viodev = dev_ent->viodev;
 322                if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
 323                    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
 324                                avail += viodev->cmo.entitled -
 325                                         max_t(size_t, viodev->cmo.allocated,
 326                                               VIO_CMO_MIN_ENT);
 327        }
 328
 329        if (delta <= avail) {
 330                vio_cmo.entitled -= delta;
 331
 332                /* Take entitlement from the excess pool first */
 333                tmp = min(vio_cmo.excess.free, delta);
 334                vio_cmo.excess.size -= tmp;
 335                vio_cmo.excess.free -= tmp;
 336                delta -= tmp;
 337
 338                /*
 339                 * Remove all but VIO_CMO_MIN_ENT bytes from devices
 340                 * until entitlement change is served
 341                 */
 342                list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 343                        if (!delta)
 344                                break;
 345
 346                        viodev = dev_ent->viodev;
 347                        tmp = 0;
 348                        if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
 349                            (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
 350                                tmp = viodev->cmo.entitled -
 351                                      max_t(size_t, viodev->cmo.allocated,
 352                                            VIO_CMO_MIN_ENT);
 353                        viodev->cmo.entitled -= min(tmp, delta);
 354                        delta -= min(tmp, delta);
 355                }
 356        } else {
 357                spin_unlock_irqrestore(&vio_cmo.lock, flags);
 358                return -ENOMEM;
 359        }
 360
 361out:
 362        schedule_delayed_work(&vio_cmo.balance_q, 0);
 363        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 364        return 0;
 365}
 366
 367/**
 368 * vio_cmo_balance - Balance entitlement among devices
 369 *
 370 * @work: work queue structure for this operation
 371 *
 372 * Any system entitlement above the minimum needed for devices, or
 373 * already allocated to devices, can be distributed to the devices.
 374 * The list of devices is iterated through to recalculate the desired
 375 * entitlement level and to determine how much entitlement above the
 376 * minimum entitlement is allocated to devices.
 377 *
 378 * Small chunks of the available entitlement are given to devices until
 379 * their requirements are fulfilled or there is no entitlement left to give.
 380 * Upon completion sizes of the reserve and excess pools are calculated.
 381 *
 382 * The system minimum entitlement level is also recalculated here.
 383 * Entitlement will be reserved for devices even after vio_bus_remove to
 384 * accommodate reloading the driver.  The OF tree is walked to count the
 385 * number of devices present and this will remove entitlement for devices
 386 * that have actually left the system after having vio_bus_remove called.
 387 */
 388static void vio_cmo_balance(struct work_struct *work)
 389{
 390        struct vio_cmo *cmo;
 391        struct vio_dev *viodev;
 392        struct vio_cmo_dev_entry *dev_ent;
 393        unsigned long flags;
 394        size_t avail = 0, level, chunk, need;
 395        int devcount = 0, fulfilled;
 396
 397        cmo = container_of(work, struct vio_cmo, balance_q.work);
 398
 399        spin_lock_irqsave(&vio_cmo.lock, flags);
 400
 401        /* Calculate minimum entitlement and fulfill spare */
 402        cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
 403        BUG_ON(cmo->min > cmo->entitled);
 404        cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
 405        cmo->min += cmo->spare;
 406        cmo->desired = cmo->min;
 407
 408        /*
 409         * Determine how much entitlement is available and reset device
 410         * entitlements
 411         */
 412        avail = cmo->entitled - cmo->spare;
 413        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 414                viodev = dev_ent->viodev;
 415                devcount++;
 416                viodev->cmo.entitled = VIO_CMO_MIN_ENT;
 417                cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
 418                avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
 419        }
 420
 421        /*
 422         * Having provided each device with the minimum entitlement, loop
 423         * over the devices portioning out the remaining entitlement
 424         * until there is nothing left.
 425         */
 426        level = VIO_CMO_MIN_ENT;
 427        while (avail) {
 428                fulfilled = 0;
 429                list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 430                        viodev = dev_ent->viodev;
 431
 432                        if (viodev->cmo.desired <= level) {
 433                                fulfilled++;
 434                                continue;
 435                        }
 436
 437                        /*
 438                         * Give the device up to VIO_CMO_BALANCE_CHUNK
 439                         * bytes of entitlement, but do not exceed the
 440                         * desired level of entitlement for the device.
 441                         */
 442                        chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
 443                        chunk = min(chunk, (viodev->cmo.desired -
 444                                            viodev->cmo.entitled));
 445                        viodev->cmo.entitled += chunk;
 446
 447                        /*
 448                         * If the memory for this entitlement increase was
 449                         * already allocated to the device it does not come
 450                         * from the available pool being portioned out.
 451                         */
 452                        need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
 453                               max(viodev->cmo.allocated, level);
 454                        avail -= need;
 455
 456                }
 457                if (fulfilled == devcount)
 458                        break;
 459                level += VIO_CMO_BALANCE_CHUNK;
 460        }
 461
 462        /* Calculate new reserve and excess pool sizes */
 463        cmo->reserve.size = cmo->min;
 464        cmo->excess.free = 0;
 465        cmo->excess.size = 0;
 466        need = 0;
 467        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 468                viodev = dev_ent->viodev;
 469                /* Calculated reserve size above the minimum entitlement */
 470                if (viodev->cmo.entitled)
 471                        cmo->reserve.size += (viodev->cmo.entitled -
 472                                              VIO_CMO_MIN_ENT);
 473                /* Calculated used excess entitlement */
 474                if (viodev->cmo.allocated > viodev->cmo.entitled)
 475                        need += viodev->cmo.allocated - viodev->cmo.entitled;
 476        }
 477        cmo->excess.size = cmo->entitled - cmo->reserve.size;
 478        cmo->excess.free = cmo->excess.size - need;
 479
 480        cancel_delayed_work(to_delayed_work(work));
 481        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 482}
 483
 484static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
 485                                          dma_addr_t *dma_handle, gfp_t flag,
 486                                          unsigned long attrs)
 487{
 488        struct vio_dev *viodev = to_vio_dev(dev);
 489        void *ret;
 490
 491        if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
 492                atomic_inc(&viodev->cmo.allocs_failed);
 493                return NULL;
 494        }
 495
 496        ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
 497                                    dma_handle, dev->coherent_dma_mask, flag,
 498                                    dev_to_node(dev));
 499        if (unlikely(ret == NULL)) {
 500                vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
 501                atomic_inc(&viodev->cmo.allocs_failed);
 502        }
 503
 504        return ret;
 505}
 506
 507static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
 508                                        void *vaddr, dma_addr_t dma_handle,
 509                                        unsigned long attrs)
 510{
 511        struct vio_dev *viodev = to_vio_dev(dev);
 512
 513        iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
 514        vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
 515}
 516
 517static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
 518                                         unsigned long offset, size_t size,
 519                                         enum dma_data_direction direction,
 520                                         unsigned long attrs)
 521{
 522        struct vio_dev *viodev = to_vio_dev(dev);
 523        struct iommu_table *tbl = get_iommu_table_base(dev);
 524        dma_addr_t ret = DMA_MAPPING_ERROR;
 525
 526        if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
 527                goto out_fail;
 528        ret = iommu_map_page(dev, tbl, page, offset, size, device_to_mask(dev),
 529                        direction, attrs);
 530        if (unlikely(ret == DMA_MAPPING_ERROR))
 531                goto out_deallocate;
 532        return ret;
 533
 534out_deallocate:
 535        vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
 536out_fail:
 537        atomic_inc(&viodev->cmo.allocs_failed);
 538        return DMA_MAPPING_ERROR;
 539}
 540
 541static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 542                                     size_t size,
 543                                     enum dma_data_direction direction,
 544                                     unsigned long attrs)
 545{
 546        struct vio_dev *viodev = to_vio_dev(dev);
 547        struct iommu_table *tbl = get_iommu_table_base(dev);
 548
 549        iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
 550        vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
 551}
 552
 553static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 554                                int nelems, enum dma_data_direction direction,
 555                                unsigned long attrs)
 556{
 557        struct vio_dev *viodev = to_vio_dev(dev);
 558        struct iommu_table *tbl = get_iommu_table_base(dev);
 559        struct scatterlist *sgl;
 560        int ret, count;
 561        size_t alloc_size = 0;
 562
 563        for_each_sg(sglist, sgl, nelems, count)
 564                alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
 565
 566        if (vio_cmo_alloc(viodev, alloc_size))
 567                goto out_fail;
 568        ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, device_to_mask(dev),
 569                        direction, attrs);
 570        if (unlikely(!ret))
 571                goto out_deallocate;
 572
 573        for_each_sg(sglist, sgl, ret, count)
 574                alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
 575        if (alloc_size)
 576                vio_cmo_dealloc(viodev, alloc_size);
 577        return ret;
 578
 579out_deallocate:
 580        vio_cmo_dealloc(viodev, alloc_size);
 581out_fail:
 582        atomic_inc(&viodev->cmo.allocs_failed);
 583        return 0;
 584}
 585
 586static void vio_dma_iommu_unmap_sg(struct device *dev,
 587                struct scatterlist *sglist, int nelems,
 588                enum dma_data_direction direction,
 589                unsigned long attrs)
 590{
 591        struct vio_dev *viodev = to_vio_dev(dev);
 592        struct iommu_table *tbl = get_iommu_table_base(dev);
 593        struct scatterlist *sgl;
 594        size_t alloc_size = 0;
 595        int count;
 596
 597        for_each_sg(sglist, sgl, nelems, count)
 598                alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
 599
 600        ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
 601        vio_cmo_dealloc(viodev, alloc_size);
 602}
 603
 604static const struct dma_map_ops vio_dma_mapping_ops = {
 605        .alloc             = vio_dma_iommu_alloc_coherent,
 606        .free              = vio_dma_iommu_free_coherent,
 607        .map_sg            = vio_dma_iommu_map_sg,
 608        .unmap_sg          = vio_dma_iommu_unmap_sg,
 609        .map_page          = vio_dma_iommu_map_page,
 610        .unmap_page        = vio_dma_iommu_unmap_page,
 611        .dma_supported     = dma_iommu_dma_supported,
 612        .get_required_mask = dma_iommu_get_required_mask,
 613        .mmap              = dma_common_mmap,
 614        .get_sgtable       = dma_common_get_sgtable,
 615        .alloc_pages       = dma_common_alloc_pages,
 616        .free_pages        = dma_common_free_pages,
 617};
 618
 619/**
 620 * vio_cmo_set_dev_desired - Set desired entitlement for a device
 621 *
 622 * @viodev: struct vio_dev for device to alter
 623 * @desired: new desired entitlement level in bytes
 624 *
 625 * For use by devices to request a change to their entitlement at runtime or
 626 * through sysfs.  The desired entitlement level is changed and a balancing
 627 * of system resources is scheduled to run in the future.
 628 */
 629void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
 630{
 631        unsigned long flags;
 632        struct vio_cmo_dev_entry *dev_ent;
 633        int found = 0;
 634
 635        if (!firmware_has_feature(FW_FEATURE_CMO))
 636                return;
 637
 638        spin_lock_irqsave(&vio_cmo.lock, flags);
 639        if (desired < VIO_CMO_MIN_ENT)
 640                desired = VIO_CMO_MIN_ENT;
 641
 642        /*
 643         * Changes will not be made for devices not in the device list.
 644         * If it is not in the device list, then no driver is loaded
 645         * for the device and it can not receive entitlement.
 646         */
 647        list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
 648                if (viodev == dev_ent->viodev) {
 649                        found = 1;
 650                        break;
 651                }
 652        if (!found) {
 653                spin_unlock_irqrestore(&vio_cmo.lock, flags);
 654                return;
 655        }
 656
 657        /* Increase/decrease in desired device entitlement */
 658        if (desired >= viodev->cmo.desired) {
 659                /* Just bump the bus and device values prior to a balance*/
 660                vio_cmo.desired += desired - viodev->cmo.desired;
 661                viodev->cmo.desired = desired;
 662        } else {
 663                /* Decrease bus and device values for desired entitlement */
 664                vio_cmo.desired -= viodev->cmo.desired - desired;
 665                viodev->cmo.desired = desired;
 666                /*
 667                 * If less entitlement is desired than current entitlement, move
 668                 * any reserve memory in the change region to the excess pool.
 669                 */
 670                if (viodev->cmo.entitled > desired) {
 671                        vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
 672                        vio_cmo.excess.size += viodev->cmo.entitled - desired;
 673                        /*
 674                         * If entitlement moving from the reserve pool to the
 675                         * excess pool is currently unused, add to the excess
 676                         * free counter.
 677                         */
 678                        if (viodev->cmo.allocated < viodev->cmo.entitled)
 679                                vio_cmo.excess.free += viodev->cmo.entitled -
 680                                                       max(viodev->cmo.allocated, desired);
 681                        viodev->cmo.entitled = desired;
 682                }
 683        }
 684        schedule_delayed_work(&vio_cmo.balance_q, 0);
 685        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 686}
 687
 688/**
 689 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
 690 *
 691 * @viodev - Pointer to struct vio_dev for device
 692 *
 693 * Determine the devices IO memory entitlement needs, attempting
 694 * to satisfy the system minimum entitlement at first and scheduling
 695 * a balance operation to take care of the rest at a later time.
 696 *
 697 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
 698 *          -ENOMEM when entitlement is not available for device or
 699 *          device entry.
 700 *
 701 */
 702static int vio_cmo_bus_probe(struct vio_dev *viodev)
 703{
 704        struct vio_cmo_dev_entry *dev_ent;
 705        struct device *dev = &viodev->dev;
 706        struct iommu_table *tbl;
 707        struct vio_driver *viodrv = to_vio_driver(dev->driver);
 708        unsigned long flags;
 709        size_t size;
 710        bool dma_capable = false;
 711
 712        tbl = get_iommu_table_base(dev);
 713
 714        /* A device requires entitlement if it has a DMA window property */
 715        switch (viodev->family) {
 716        case VDEVICE:
 717                if (of_get_property(viodev->dev.of_node,
 718                                        "ibm,my-dma-window", NULL))
 719                        dma_capable = true;
 720                break;
 721        case PFO:
 722                dma_capable = false;
 723                break;
 724        default:
 725                dev_warn(dev, "unknown device family: %d\n", viodev->family);
 726                BUG();
 727                break;
 728        }
 729
 730        /* Configure entitlement for the device. */
 731        if (dma_capable) {
 732                /* Check that the driver is CMO enabled and get desired DMA */
 733                if (!viodrv->get_desired_dma) {
 734                        dev_err(dev, "%s: device driver does not support CMO\n",
 735                                __func__);
 736                        return -EINVAL;
 737                }
 738
 739                viodev->cmo.desired =
 740                        IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
 741                if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
 742                        viodev->cmo.desired = VIO_CMO_MIN_ENT;
 743                size = VIO_CMO_MIN_ENT;
 744
 745                dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
 746                                  GFP_KERNEL);
 747                if (!dev_ent)
 748                        return -ENOMEM;
 749
 750                dev_ent->viodev = viodev;
 751                spin_lock_irqsave(&vio_cmo.lock, flags);
 752                list_add(&dev_ent->list, &vio_cmo.device_list);
 753        } else {
 754                viodev->cmo.desired = 0;
 755                size = 0;
 756                spin_lock_irqsave(&vio_cmo.lock, flags);
 757        }
 758
 759        /*
 760         * If the needs for vio_cmo.min have not changed since they
 761         * were last set, the number of devices in the OF tree has
 762         * been constant and the IO memory for this is already in
 763         * the reserve pool.
 764         */
 765        if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
 766                            VIO_CMO_MIN_ENT)) {
 767                /* Updated desired entitlement if device requires it */
 768                if (size)
 769                        vio_cmo.desired += (viodev->cmo.desired -
 770                                        VIO_CMO_MIN_ENT);
 771        } else {
 772                size_t tmp;
 773
 774                tmp = vio_cmo.spare + vio_cmo.excess.free;
 775                if (tmp < size) {
 776                        dev_err(dev, "%s: insufficient free "
 777                                "entitlement to add device. "
 778                                "Need %lu, have %lu\n", __func__,
 779                                size, (vio_cmo.spare + tmp));
 780                        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 781                        return -ENOMEM;
 782                }
 783
 784                /* Use excess pool first to fulfill request */
 785                tmp = min(size, vio_cmo.excess.free);
 786                vio_cmo.excess.free -= tmp;
 787                vio_cmo.excess.size -= tmp;
 788                vio_cmo.reserve.size += tmp;
 789
 790                /* Use spare if excess pool was insufficient */
 791                vio_cmo.spare -= size - tmp;
 792
 793                /* Update bus accounting */
 794                vio_cmo.min += size;
 795                vio_cmo.desired += viodev->cmo.desired;
 796        }
 797        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 798        return 0;
 799}
 800
 801/**
 802 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
 803 *
 804 * @viodev - Pointer to struct vio_dev for device
 805 *
 806 * Remove the device from the cmo device list.  The minimum entitlement
 807 * will be reserved for the device as long as it is in the system.  The
 808 * rest of the entitlement the device had been allocated will be returned
 809 * to the system.
 810 */
 811static void vio_cmo_bus_remove(struct vio_dev *viodev)
 812{
 813        struct vio_cmo_dev_entry *dev_ent;
 814        unsigned long flags;
 815        size_t tmp;
 816
 817        spin_lock_irqsave(&vio_cmo.lock, flags);
 818        if (viodev->cmo.allocated) {
 819                dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
 820                        "allocated after remove operation.\n",
 821                        __func__, viodev->cmo.allocated);
 822                BUG();
 823        }
 824
 825        /*
 826         * Remove the device from the device list being maintained for
 827         * CMO enabled devices.
 828         */
 829        list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
 830                if (viodev == dev_ent->viodev) {
 831                        list_del(&dev_ent->list);
 832                        kfree(dev_ent);
 833                        break;
 834                }
 835
 836        /*
 837         * Devices may not require any entitlement and they do not need
 838         * to be processed.  Otherwise, return the device's entitlement
 839         * back to the pools.
 840         */
 841        if (viodev->cmo.entitled) {
 842                /*
 843                 * This device has not yet left the OF tree, it's
 844                 * minimum entitlement remains in vio_cmo.min and
 845                 * vio_cmo.desired
 846                 */
 847                vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
 848
 849                /*
 850                 * Save min allocation for device in reserve as long
 851                 * as it exists in OF tree as determined by later
 852                 * balance operation
 853                 */
 854                viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
 855
 856                /* Replenish spare from freed reserve pool */
 857                if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
 858                        tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
 859                                                         vio_cmo.spare));
 860                        vio_cmo.spare += tmp;
 861                        viodev->cmo.entitled -= tmp;
 862                }
 863
 864                /* Remaining reserve goes to excess pool */
 865                vio_cmo.excess.size += viodev->cmo.entitled;
 866                vio_cmo.excess.free += viodev->cmo.entitled;
 867                vio_cmo.reserve.size -= viodev->cmo.entitled;
 868
 869                /*
 870                 * Until the device is removed it will keep a
 871                 * minimum entitlement; this will guarantee that
 872                 * a module unload/load will result in a success.
 873                 */
 874                viodev->cmo.entitled = VIO_CMO_MIN_ENT;
 875                viodev->cmo.desired = VIO_CMO_MIN_ENT;
 876                atomic_set(&viodev->cmo.allocs_failed, 0);
 877        }
 878
 879        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 880}
 881
 882static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
 883{
 884        set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
 885}
 886
 887/**
 888 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
 889 *
 890 * Set up the reserve and excess entitlement pools based on available
 891 * system entitlement and the number of devices in the OF tree that
 892 * require entitlement in the reserve pool.
 893 */
 894static void vio_cmo_bus_init(void)
 895{
 896        struct hvcall_mpp_data mpp_data;
 897        int err;
 898
 899        memset(&vio_cmo, 0, sizeof(struct vio_cmo));
 900        spin_lock_init(&vio_cmo.lock);
 901        INIT_LIST_HEAD(&vio_cmo.device_list);
 902        INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
 903
 904        /* Get current system entitlement */
 905        err = h_get_mpp(&mpp_data);
 906
 907        /*
 908         * On failure, continue with entitlement set to 0, will panic()
 909         * later when spare is reserved.
 910         */
 911        if (err != H_SUCCESS) {
 912                printk(KERN_ERR "%s: unable to determine system IO "\
 913                       "entitlement. (%d)\n", __func__, err);
 914                vio_cmo.entitled = 0;
 915        } else {
 916                vio_cmo.entitled = mpp_data.entitled_mem;
 917        }
 918
 919        /* Set reservation and check against entitlement */
 920        vio_cmo.spare = VIO_CMO_MIN_ENT;
 921        vio_cmo.reserve.size = vio_cmo.spare;
 922        vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
 923                                 VIO_CMO_MIN_ENT);
 924        if (vio_cmo.reserve.size > vio_cmo.entitled) {
 925                printk(KERN_ERR "%s: insufficient system entitlement\n",
 926                       __func__);
 927                panic("%s: Insufficient system entitlement", __func__);
 928        }
 929
 930        /* Set the remaining accounting variables */
 931        vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
 932        vio_cmo.excess.free = vio_cmo.excess.size;
 933        vio_cmo.min = vio_cmo.reserve.size;
 934        vio_cmo.desired = vio_cmo.reserve.size;
 935}
 936
 937/* sysfs device functions and data structures for CMO */
 938
 939#define viodev_cmo_rd_attr(name)                                        \
 940static ssize_t cmo_##name##_show(struct device *dev,                    \
 941                                        struct device_attribute *attr,  \
 942                                         char *buf)                     \
 943{                                                                       \
 944        return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
 945}
 946
 947static ssize_t cmo_allocs_failed_show(struct device *dev,
 948                struct device_attribute *attr, char *buf)
 949{
 950        struct vio_dev *viodev = to_vio_dev(dev);
 951        return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
 952}
 953
 954static ssize_t cmo_allocs_failed_store(struct device *dev,
 955                struct device_attribute *attr, const char *buf, size_t count)
 956{
 957        struct vio_dev *viodev = to_vio_dev(dev);
 958        atomic_set(&viodev->cmo.allocs_failed, 0);
 959        return count;
 960}
 961
 962static ssize_t cmo_desired_store(struct device *dev,
 963                struct device_attribute *attr, const char *buf, size_t count)
 964{
 965        struct vio_dev *viodev = to_vio_dev(dev);
 966        size_t new_desired;
 967        int ret;
 968
 969        ret = kstrtoul(buf, 10, &new_desired);
 970        if (ret)
 971                return ret;
 972
 973        vio_cmo_set_dev_desired(viodev, new_desired);
 974        return count;
 975}
 976
 977viodev_cmo_rd_attr(desired);
 978viodev_cmo_rd_attr(entitled);
 979viodev_cmo_rd_attr(allocated);
 980
 981static ssize_t name_show(struct device *, struct device_attribute *, char *);
 982static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
 983static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 984                             char *buf);
 985
 986static struct device_attribute dev_attr_name;
 987static struct device_attribute dev_attr_devspec;
 988static struct device_attribute dev_attr_modalias;
 989
 990static DEVICE_ATTR_RO(cmo_entitled);
 991static DEVICE_ATTR_RO(cmo_allocated);
 992static DEVICE_ATTR_RW(cmo_desired);
 993static DEVICE_ATTR_RW(cmo_allocs_failed);
 994
 995static struct attribute *vio_cmo_dev_attrs[] = {
 996        &dev_attr_name.attr,
 997        &dev_attr_devspec.attr,
 998        &dev_attr_modalias.attr,
 999        &dev_attr_cmo_entitled.attr,
1000        &dev_attr_cmo_allocated.attr,
1001        &dev_attr_cmo_desired.attr,
1002        &dev_attr_cmo_allocs_failed.attr,
1003        NULL,
1004};
1005ATTRIBUTE_GROUPS(vio_cmo_dev);
1006
1007/* sysfs bus functions and data structures for CMO */
1008
1009#define viobus_cmo_rd_attr(name)                                        \
1010static ssize_t cmo_bus_##name##_show(struct bus_type *bt, char *buf)    \
1011{                                                                       \
1012        return sprintf(buf, "%lu\n", vio_cmo.name);                     \
1013}                                                                       \
1014static struct bus_attribute bus_attr_cmo_bus_##name =                   \
1015        __ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
1016
1017#define viobus_cmo_pool_rd_attr(name, var)                              \
1018static ssize_t                                                          \
1019cmo_##name##_##var##_show(struct bus_type *bt, char *buf)               \
1020{                                                                       \
1021        return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
1022}                                                                       \
1023static BUS_ATTR_RO(cmo_##name##_##var)
1024
1025viobus_cmo_rd_attr(entitled);
1026viobus_cmo_rd_attr(spare);
1027viobus_cmo_rd_attr(min);
1028viobus_cmo_rd_attr(desired);
1029viobus_cmo_rd_attr(curr);
1030viobus_cmo_pool_rd_attr(reserve, size);
1031viobus_cmo_pool_rd_attr(excess, size);
1032viobus_cmo_pool_rd_attr(excess, free);
1033
1034static ssize_t cmo_high_show(struct bus_type *bt, char *buf)
1035{
1036        return sprintf(buf, "%lu\n", vio_cmo.high);
1037}
1038
1039static ssize_t cmo_high_store(struct bus_type *bt, const char *buf,
1040                              size_t count)
1041{
1042        unsigned long flags;
1043
1044        spin_lock_irqsave(&vio_cmo.lock, flags);
1045        vio_cmo.high = vio_cmo.curr;
1046        spin_unlock_irqrestore(&vio_cmo.lock, flags);
1047
1048        return count;
1049}
1050static BUS_ATTR_RW(cmo_high);
1051
1052static struct attribute *vio_bus_attrs[] = {
1053        &bus_attr_cmo_bus_entitled.attr,
1054        &bus_attr_cmo_bus_spare.attr,
1055        &bus_attr_cmo_bus_min.attr,
1056        &bus_attr_cmo_bus_desired.attr,
1057        &bus_attr_cmo_bus_curr.attr,
1058        &bus_attr_cmo_high.attr,
1059        &bus_attr_cmo_reserve_size.attr,
1060        &bus_attr_cmo_excess_size.attr,
1061        &bus_attr_cmo_excess_free.attr,
1062        NULL,
1063};
1064ATTRIBUTE_GROUPS(vio_bus);
1065
1066static void vio_cmo_sysfs_init(void)
1067{
1068        vio_bus_type.dev_groups = vio_cmo_dev_groups;
1069        vio_bus_type.bus_groups = vio_bus_groups;
1070}
1071#else /* CONFIG_PPC_SMLPAR */
1072int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1073void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1074static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1075static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1076static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1077static void vio_cmo_bus_init(void) {}
1078static void vio_cmo_sysfs_init(void) { }
1079#endif /* CONFIG_PPC_SMLPAR */
1080EXPORT_SYMBOL(vio_cmo_entitlement_update);
1081EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1082
1083
1084/*
1085 * Platform Facilities Option (PFO) support
1086 */
1087
1088/**
1089 * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
1090 *
1091 * @vdev - Pointer to a struct vio_dev for device
1092 * @op - Pointer to a struct vio_pfo_op for the operation parameters
1093 *
1094 * Calls the hypervisor to synchronously perform the PFO operation
1095 * described in @op.  In the case of a busy response from the hypervisor,
1096 * the operation will be re-submitted indefinitely unless a non-zero timeout
1097 * is specified or an error occurs. The timeout places a limit on when to
1098 * stop re-submitting a operation, the total time can be exceeded if an
1099 * operation is in progress.
1100 *
1101 * If op->hcall_ret is not NULL, this will be set to the return from the
1102 * last h_cop_op call or it will be 0 if an error not involving the h_call
1103 * was encountered.
1104 *
1105 * Returns:
1106 *      0 on success,
1107 *      -EINVAL if the h_call fails due to an invalid parameter,
1108 *      -E2BIG if the h_call can not be performed synchronously,
1109 *      -EBUSY if a timeout is specified and has elapsed,
1110 *      -EACCES if the memory area for data/status has been rescinded, or
1111 *      -EPERM if a hardware fault has been indicated
1112 */
1113int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
1114{
1115        struct device *dev = &vdev->dev;
1116        unsigned long deadline = 0;
1117        long hret = 0;
1118        int ret = 0;
1119
1120        if (op->timeout)
1121                deadline = jiffies + msecs_to_jiffies(op->timeout);
1122
1123        while (true) {
1124                hret = plpar_hcall_norets(H_COP, op->flags,
1125                                vdev->resource_id,
1126                                op->in, op->inlen, op->out,
1127                                op->outlen, op->csbcpb);
1128
1129                if (hret == H_SUCCESS ||
1130                    (hret != H_NOT_ENOUGH_RESOURCES &&
1131                     hret != H_BUSY && hret != H_RESOURCE) ||
1132                    (op->timeout && time_after(deadline, jiffies)))
1133                        break;
1134
1135                dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
1136        }
1137
1138        switch (hret) {
1139        case H_SUCCESS:
1140                ret = 0;
1141                break;
1142        case H_OP_MODE:
1143        case H_TOO_BIG:
1144                ret = -E2BIG;
1145                break;
1146        case H_RESCINDED:
1147                ret = -EACCES;
1148                break;
1149        case H_HARDWARE:
1150                ret = -EPERM;
1151                break;
1152        case H_NOT_ENOUGH_RESOURCES:
1153        case H_RESOURCE:
1154        case H_BUSY:
1155                ret = -EBUSY;
1156                break;
1157        default:
1158                ret = -EINVAL;
1159                break;
1160        }
1161
1162        if (ret)
1163                dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
1164                                __func__, ret, hret);
1165
1166        op->hcall_err = hret;
1167        return ret;
1168}
1169EXPORT_SYMBOL(vio_h_cop_sync);
1170
1171static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1172{
1173        const __be32 *dma_window;
1174        struct iommu_table *tbl;
1175        unsigned long offset, size;
1176
1177        dma_window = of_get_property(dev->dev.of_node,
1178                                  "ibm,my-dma-window", NULL);
1179        if (!dma_window)
1180                return NULL;
1181
1182        tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
1183        if (tbl == NULL)
1184                return NULL;
1185
1186        of_parse_dma_window(dev->dev.of_node, dma_window,
1187                            &tbl->it_index, &offset, &size);
1188
1189        /* TCE table size - measured in tce entries */
1190        tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
1191        tbl->it_size = size >> tbl->it_page_shift;
1192        /* offset for VIO should always be 0 */
1193        tbl->it_offset = offset >> tbl->it_page_shift;
1194        tbl->it_busno = 0;
1195        tbl->it_type = TCE_VB;
1196        tbl->it_blocksize = 16;
1197
1198        if (firmware_has_feature(FW_FEATURE_LPAR))
1199                tbl->it_ops = &iommu_table_lpar_multi_ops;
1200        else
1201                tbl->it_ops = &iommu_table_pseries_ops;
1202
1203        return iommu_init_table(tbl, -1);
1204}
1205
1206/**
1207 * vio_match_device: - Tell if a VIO device has a matching
1208 *                      VIO device id structure.
1209 * @ids:        array of VIO device id structures to search in
1210 * @dev:        the VIO device structure to match against
1211 *
1212 * Used by a driver to check whether a VIO device present in the
1213 * system is in its list of supported devices. Returns the matching
1214 * vio_device_id structure or NULL if there is no match.
1215 */
1216static const struct vio_device_id *vio_match_device(
1217                const struct vio_device_id *ids, const struct vio_dev *dev)
1218{
1219        while (ids->type[0] != '\0') {
1220                if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
1221                    of_device_is_compatible(dev->dev.of_node,
1222                                         ids->compat))
1223                        return ids;
1224                ids++;
1225        }
1226        return NULL;
1227}
1228
1229/*
1230 * Convert from struct device to struct vio_dev and pass to driver.
1231 * dev->driver has already been set by generic code because vio_bus_match
1232 * succeeded.
1233 */
1234static int vio_bus_probe(struct device *dev)
1235{
1236        struct vio_dev *viodev = to_vio_dev(dev);
1237        struct vio_driver *viodrv = to_vio_driver(dev->driver);
1238        const struct vio_device_id *id;
1239        int error = -ENODEV;
1240
1241        if (!viodrv->probe)
1242                return error;
1243
1244        id = vio_match_device(viodrv->id_table, viodev);
1245        if (id) {
1246                memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1247                if (firmware_has_feature(FW_FEATURE_CMO)) {
1248                        error = vio_cmo_bus_probe(viodev);
1249                        if (error)
1250                                return error;
1251                }
1252                error = viodrv->probe(viodev, id);
1253                if (error && firmware_has_feature(FW_FEATURE_CMO))
1254                        vio_cmo_bus_remove(viodev);
1255        }
1256
1257        return error;
1258}
1259
1260/* convert from struct device to struct vio_dev and pass to driver. */
1261static int vio_bus_remove(struct device *dev)
1262{
1263        struct vio_dev *viodev = to_vio_dev(dev);
1264        struct vio_driver *viodrv = to_vio_driver(dev->driver);
1265        struct device *devptr;
1266
1267        /*
1268         * Hold a reference to the device after the remove function is called
1269         * to allow for CMO accounting cleanup for the device.
1270         */
1271        devptr = get_device(dev);
1272
1273        if (viodrv->remove)
1274                viodrv->remove(viodev);
1275
1276        if (firmware_has_feature(FW_FEATURE_CMO))
1277                vio_cmo_bus_remove(viodev);
1278
1279        put_device(devptr);
1280        return 0;
1281}
1282
1283/**
1284 * vio_register_driver: - Register a new vio driver
1285 * @viodrv:     The vio_driver structure to be registered.
1286 */
1287int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
1288                          const char *mod_name)
1289{
1290        // vio_bus_type is only initialised for pseries
1291        if (!machine_is(pseries))
1292                return -ENODEV;
1293
1294        pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
1295
1296        /* fill in 'struct driver' fields */
1297        viodrv->driver.name = viodrv->name;
1298        viodrv->driver.pm = viodrv->pm;
1299        viodrv->driver.bus = &vio_bus_type;
1300        viodrv->driver.owner = owner;
1301        viodrv->driver.mod_name = mod_name;
1302
1303        return driver_register(&viodrv->driver);
1304}
1305EXPORT_SYMBOL(__vio_register_driver);
1306
1307/**
1308 * vio_unregister_driver - Remove registration of vio driver.
1309 * @viodrv:     The vio_driver struct to be removed form registration
1310 */
1311void vio_unregister_driver(struct vio_driver *viodrv)
1312{
1313        driver_unregister(&viodrv->driver);
1314}
1315EXPORT_SYMBOL(vio_unregister_driver);
1316
1317/* vio_dev refcount hit 0 */
1318static void vio_dev_release(struct device *dev)
1319{
1320        struct iommu_table *tbl = get_iommu_table_base(dev);
1321
1322        if (tbl)
1323                iommu_tce_table_put(tbl);
1324        of_node_put(dev->of_node);
1325        kfree(to_vio_dev(dev));
1326}
1327
1328/**
1329 * vio_register_device_node: - Register a new vio device.
1330 * @of_node:    The OF node for this device.
1331 *
1332 * Creates and initializes a vio_dev structure from the data in
1333 * of_node and adds it to the list of virtual devices.
1334 * Returns a pointer to the created vio_dev or NULL if node has
1335 * NULL device_type or compatible fields.
1336 */
1337struct vio_dev *vio_register_device_node(struct device_node *of_node)
1338{
1339        struct vio_dev *viodev;
1340        struct device_node *parent_node;
1341        const __be32 *prop;
1342        enum vio_dev_family family;
1343        const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
1344
1345        /*
1346         * Determine if this node is a under the /vdevice node or under the
1347         * /ibm,platform-facilities node.  This decides the device's family.
1348         */
1349        parent_node = of_get_parent(of_node);
1350        if (parent_node) {
1351                if (!strcmp(parent_node->type, "ibm,platform-facilities"))
1352                        family = PFO;
1353                else if (!strcmp(parent_node->type, "vdevice"))
1354                        family = VDEVICE;
1355                else {
1356                        pr_warn("%s: parent(%pOF) of %s not recognized.\n",
1357                                        __func__,
1358                                        parent_node,
1359                                        of_node_name);
1360                        of_node_put(parent_node);
1361                        return NULL;
1362                }
1363                of_node_put(parent_node);
1364        } else {
1365                pr_warn("%s: could not determine the parent of node %s.\n",
1366                                __func__, of_node_name);
1367                return NULL;
1368        }
1369
1370        if (family == PFO) {
1371                if (of_get_property(of_node, "interrupt-controller", NULL)) {
1372                        pr_debug("%s: Skipping the interrupt controller %s.\n",
1373                                        __func__, of_node_name);
1374                        return NULL;
1375                }
1376        }
1377
1378        /* allocate a vio_dev for this node */
1379        viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
1380        if (viodev == NULL) {
1381                pr_warn("%s: allocation failure for VIO device.\n", __func__);
1382                return NULL;
1383        }
1384
1385        /* we need the 'device_type' property, in order to match with drivers */
1386        viodev->family = family;
1387        if (viodev->family == VDEVICE) {
1388                unsigned int unit_address;
1389
1390                if (of_node->type != NULL)
1391                        viodev->type = of_node->type;
1392                else {
1393                        pr_warn("%s: node %s is missing the 'device_type' "
1394                                        "property.\n", __func__, of_node_name);
1395                        goto out;
1396                }
1397
1398                prop = of_get_property(of_node, "reg", NULL);
1399                if (prop == NULL) {
1400                        pr_warn("%s: node %s missing 'reg'\n",
1401                                        __func__, of_node_name);
1402                        goto out;
1403                }
1404                unit_address = of_read_number(prop, 1);
1405                dev_set_name(&viodev->dev, "%x", unit_address);
1406                viodev->irq = irq_of_parse_and_map(of_node, 0);
1407                viodev->unit_address = unit_address;
1408        } else {
1409                /* PFO devices need their resource_id for submitting COP_OPs
1410                 * This is an optional field for devices, but is required when
1411                 * performing synchronous ops */
1412                prop = of_get_property(of_node, "ibm,resource-id", NULL);
1413                if (prop != NULL)
1414                        viodev->resource_id = of_read_number(prop, 1);
1415
1416                dev_set_name(&viodev->dev, "%s", of_node_name);
1417                viodev->type = of_node_name;
1418                viodev->irq = 0;
1419        }
1420
1421        viodev->name = of_node->name;
1422        viodev->dev.of_node = of_node_get(of_node);
1423
1424        set_dev_node(&viodev->dev, of_node_to_nid(of_node));
1425
1426        /* init generic 'struct device' fields: */
1427        viodev->dev.parent = &vio_bus_device.dev;
1428        viodev->dev.bus = &vio_bus_type;
1429        viodev->dev.release = vio_dev_release;
1430
1431        if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) {
1432                if (firmware_has_feature(FW_FEATURE_CMO))
1433                        vio_cmo_set_dma_ops(viodev);
1434                else
1435                        set_dma_ops(&viodev->dev, &dma_iommu_ops);
1436
1437                set_iommu_table_base(&viodev->dev,
1438                                     vio_build_iommu_table(viodev));
1439
1440                /* needed to ensure proper operation of coherent allocations
1441                 * later, in case driver doesn't set it explicitly */
1442                viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
1443                viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
1444        }
1445
1446        /* register with generic device framework */
1447        if (device_register(&viodev->dev)) {
1448                printk(KERN_ERR "%s: failed to register device %s\n",
1449                                __func__, dev_name(&viodev->dev));
1450                put_device(&viodev->dev);
1451                return NULL;
1452        }
1453
1454        return viodev;
1455
1456out:    /* Use this exit point for any return prior to device_register */
1457        kfree(viodev);
1458
1459        return NULL;
1460}
1461EXPORT_SYMBOL(vio_register_device_node);
1462
1463/*
1464 * vio_bus_scan_for_devices - Scan OF and register each child device
1465 * @root_name - OF node name for the root of the subtree to search.
1466 *              This must be non-NULL
1467 *
1468 * Starting from the root node provide, register the device node for
1469 * each child beneath the root.
1470 */
1471static void vio_bus_scan_register_devices(char *root_name)
1472{
1473        struct device_node *node_root, *node_child;
1474
1475        if (!root_name)
1476                return;
1477
1478        node_root = of_find_node_by_name(NULL, root_name);
1479        if (node_root) {
1480
1481                /*
1482                 * Create struct vio_devices for each virtual device in
1483                 * the device tree. Drivers will associate with them later.
1484                 */
1485                node_child = of_get_next_child(node_root, NULL);
1486                while (node_child) {
1487                        vio_register_device_node(node_child);
1488                        node_child = of_get_next_child(node_root, node_child);
1489                }
1490                of_node_put(node_root);
1491        }
1492}
1493
1494/**
1495 * vio_bus_init: - Initialize the virtual IO bus
1496 */
1497static int __init vio_bus_init(void)
1498{
1499        int err;
1500
1501        if (firmware_has_feature(FW_FEATURE_CMO))
1502                vio_cmo_sysfs_init();
1503
1504        err = bus_register(&vio_bus_type);
1505        if (err) {
1506                printk(KERN_ERR "failed to register VIO bus\n");
1507                return err;
1508        }
1509
1510        /*
1511         * The fake parent of all vio devices, just to give us
1512         * a nice directory
1513         */
1514        err = device_register(&vio_bus_device.dev);
1515        if (err) {
1516                printk(KERN_WARNING "%s: device_register returned %i\n",
1517                                __func__, err);
1518                return err;
1519        }
1520
1521        if (firmware_has_feature(FW_FEATURE_CMO))
1522                vio_cmo_bus_init();
1523
1524        return 0;
1525}
1526machine_postcore_initcall(pseries, vio_bus_init);
1527
1528static int __init vio_device_init(void)
1529{
1530        vio_bus_scan_register_devices("vdevice");
1531        vio_bus_scan_register_devices("ibm,platform-facilities");
1532
1533        return 0;
1534}
1535machine_device_initcall(pseries, vio_device_init);
1536
1537static ssize_t name_show(struct device *dev,
1538                struct device_attribute *attr, char *buf)
1539{
1540        return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
1541}
1542static DEVICE_ATTR_RO(name);
1543
1544static ssize_t devspec_show(struct device *dev,
1545                struct device_attribute *attr, char *buf)
1546{
1547        struct device_node *of_node = dev->of_node;
1548
1549        return sprintf(buf, "%pOF\n", of_node);
1550}
1551static DEVICE_ATTR_RO(devspec);
1552
1553static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1554                             char *buf)
1555{
1556        const struct vio_dev *vio_dev = to_vio_dev(dev);
1557        struct device_node *dn;
1558        const char *cp;
1559
1560        dn = dev->of_node;
1561        if (!dn) {
1562                strcpy(buf, "\n");
1563                return strlen(buf);
1564        }
1565        cp = of_get_property(dn, "compatible", NULL);
1566        if (!cp) {
1567                strcpy(buf, "\n");
1568                return strlen(buf);
1569        }
1570
1571        return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
1572}
1573static DEVICE_ATTR_RO(modalias);
1574
1575static struct attribute *vio_dev_attrs[] = {
1576        &dev_attr_name.attr,
1577        &dev_attr_devspec.attr,
1578        &dev_attr_modalias.attr,
1579        NULL,
1580};
1581ATTRIBUTE_GROUPS(vio_dev);
1582
1583void vio_unregister_device(struct vio_dev *viodev)
1584{
1585        device_unregister(&viodev->dev);
1586        if (viodev->family == VDEVICE)
1587                irq_dispose_mapping(viodev->irq);
1588}
1589EXPORT_SYMBOL(vio_unregister_device);
1590
1591static int vio_bus_match(struct device *dev, struct device_driver *drv)
1592{
1593        const struct vio_dev *vio_dev = to_vio_dev(dev);
1594        struct vio_driver *vio_drv = to_vio_driver(drv);
1595        const struct vio_device_id *ids = vio_drv->id_table;
1596
1597        return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
1598}
1599
1600static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
1601{
1602        const struct vio_dev *vio_dev = to_vio_dev(dev);
1603        struct device_node *dn;
1604        const char *cp;
1605
1606        dn = dev->of_node;
1607        if (!dn)
1608                return -ENODEV;
1609        cp = of_get_property(dn, "compatible", NULL);
1610        if (!cp)
1611                return -ENODEV;
1612
1613        add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
1614        return 0;
1615}
1616
1617struct bus_type vio_bus_type = {
1618        .name = "vio",
1619        .dev_groups = vio_dev_groups,
1620        .uevent = vio_hotplug,
1621        .match = vio_bus_match,
1622        .probe = vio_bus_probe,
1623        .remove = vio_bus_remove,
1624};
1625
1626/**
1627 * vio_get_attribute: - get attribute for virtual device
1628 * @vdev:       The vio device to get property.
1629 * @which:      The property/attribute to be extracted.
1630 * @length:     Pointer to length of returned data size (unused if NULL).
1631 *
1632 * Calls prom.c's of_get_property() to return the value of the
1633 * attribute specified by @which
1634*/
1635const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
1636{
1637        return of_get_property(vdev->dev.of_node, which, length);
1638}
1639EXPORT_SYMBOL(vio_get_attribute);
1640
1641#ifdef CONFIG_PPC_PSERIES
1642/* vio_find_name() - internal because only vio.c knows how we formatted the
1643 * kobject name
1644 */
1645static struct vio_dev *vio_find_name(const char *name)
1646{
1647        struct device *found;
1648
1649        found = bus_find_device_by_name(&vio_bus_type, NULL, name);
1650        if (!found)
1651                return NULL;
1652
1653        return to_vio_dev(found);
1654}
1655
1656/**
1657 * vio_find_node - find an already-registered vio_dev
1658 * @vnode: device_node of the virtual device we're looking for
1659 *
1660 * Takes a reference to the embedded struct device which needs to be dropped
1661 * after use.
1662 */
1663struct vio_dev *vio_find_node(struct device_node *vnode)
1664{
1665        char kobj_name[20];
1666        struct device_node *vnode_parent;
1667        const char *dev_type;
1668
1669        vnode_parent = of_get_parent(vnode);
1670        if (!vnode_parent)
1671                return NULL;
1672
1673        dev_type = of_get_property(vnode_parent, "device_type", NULL);
1674        of_node_put(vnode_parent);
1675        if (!dev_type)
1676                return NULL;
1677
1678        /* construct the kobject name from the device node */
1679        if (!strcmp(dev_type, "vdevice")) {
1680                const __be32 *prop;
1681                
1682                prop = of_get_property(vnode, "reg", NULL);
1683                if (!prop)
1684                        return NULL;
1685                snprintf(kobj_name, sizeof(kobj_name), "%x",
1686                         (uint32_t)of_read_number(prop, 1));
1687        } else if (!strcmp(dev_type, "ibm,platform-facilities"))
1688                snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name);
1689        else
1690                return NULL;
1691
1692        return vio_find_name(kobj_name);
1693}
1694EXPORT_SYMBOL(vio_find_node);
1695
1696int vio_enable_interrupts(struct vio_dev *dev)
1697{
1698        int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
1699        if (rc != H_SUCCESS)
1700                printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
1701        return rc;
1702}
1703EXPORT_SYMBOL(vio_enable_interrupts);
1704
1705int vio_disable_interrupts(struct vio_dev *dev)
1706{
1707        int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
1708        if (rc != H_SUCCESS)
1709                printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
1710        return rc;
1711}
1712EXPORT_SYMBOL(vio_disable_interrupts);
1713#endif /* CONFIG_PPC_PSERIES */
1714
1715static int __init vio_init(void)
1716{
1717        dma_debug_add_bus(&vio_bus_type);
1718        return 0;
1719}
1720machine_fs_initcall(pseries, vio_init);
1721