linux/arch/powerpc/platforms/pseries/vio.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * IBM PowerPC Virtual I/O Infrastructure Support.
   4 *
   5 *    Copyright (c) 2003,2008 IBM Corp.
   6 *     Dave Engebretsen engebret@us.ibm.com
   7 *     Santiago Leon santil@us.ibm.com
   8 *     Hollis Blanchard <hollisb@us.ibm.com>
   9 *     Stephen Rothwell
  10 *     Robert Jennings <rcjenn@us.ibm.com>
  11 */
  12
  13#include <linux/cpu.h>
  14#include <linux/types.h>
  15#include <linux/delay.h>
  16#include <linux/stat.h>
  17#include <linux/device.h>
  18#include <linux/init.h>
  19#include <linux/slab.h>
  20#include <linux/console.h>
  21#include <linux/export.h>
  22#include <linux/mm.h>
  23#include <linux/dma-mapping.h>
  24#include <linux/kobject.h>
  25
  26#include <asm/iommu.h>
  27#include <asm/dma.h>
  28#include <asm/vio.h>
  29#include <asm/prom.h>
  30#include <asm/firmware.h>
  31#include <asm/tce.h>
  32#include <asm/page.h>
  33#include <asm/hvcall.h>
  34
  35static struct vio_dev vio_bus_device  = { /* fake "parent" device */
  36        .name = "vio",
  37        .type = "",
  38        .dev.init_name = "vio",
  39        .dev.bus = &vio_bus_type,
  40};
  41
  42#ifdef CONFIG_PPC_SMLPAR
  43/**
  44 * vio_cmo_pool - A pool of IO memory for CMO use
  45 *
  46 * @size: The size of the pool in bytes
  47 * @free: The amount of free memory in the pool
  48 */
  49struct vio_cmo_pool {
  50        size_t size;
  51        size_t free;
  52};
  53
  54/* How many ms to delay queued balance work */
  55#define VIO_CMO_BALANCE_DELAY 100
  56
  57/* Portion out IO memory to CMO devices by this chunk size */
  58#define VIO_CMO_BALANCE_CHUNK 131072
  59
  60/**
  61 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
  62 *
  63 * @vio_dev: struct vio_dev pointer
  64 * @list: pointer to other devices on bus that are being tracked
  65 */
  66struct vio_cmo_dev_entry {
  67        struct vio_dev *viodev;
  68        struct list_head list;
  69};
  70
  71/**
  72 * vio_cmo - VIO bus accounting structure for CMO entitlement
  73 *
  74 * @lock: spinlock for entire structure
  75 * @balance_q: work queue for balancing system entitlement
  76 * @device_list: list of CMO-enabled devices requiring entitlement
  77 * @entitled: total system entitlement in bytes
  78 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
  79 * @excess: pool of excess entitlement not needed for device reserves or spare
  80 * @spare: IO memory for device hotplug functionality
  81 * @min: minimum necessary for system operation
  82 * @desired: desired memory for system operation
  83 * @curr: bytes currently allocated
  84 * @high: high water mark for IO data usage
  85 */
  86static struct vio_cmo {
  87        spinlock_t lock;
  88        struct delayed_work balance_q;
  89        struct list_head device_list;
  90        size_t entitled;
  91        struct vio_cmo_pool reserve;
  92        struct vio_cmo_pool excess;
  93        size_t spare;
  94        size_t min;
  95        size_t desired;
  96        size_t curr;
  97        size_t high;
  98} vio_cmo;
  99
 100/**
 101 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
 102 */
 103static int vio_cmo_num_OF_devs(void)
 104{
 105        struct device_node *node_vroot;
 106        int count = 0;
 107
 108        /*
 109         * Count the number of vdevice entries with an
 110         * ibm,my-dma-window OF property
 111         */
 112        node_vroot = of_find_node_by_name(NULL, "vdevice");
 113        if (node_vroot) {
 114                struct device_node *of_node;
 115                struct property *prop;
 116
 117                for_each_child_of_node(node_vroot, of_node) {
 118                        prop = of_find_property(of_node, "ibm,my-dma-window",
 119                                               NULL);
 120                        if (prop)
 121                                count++;
 122                }
 123        }
 124        of_node_put(node_vroot);
 125        return count;
 126}
 127
 128/**
 129 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
 130 *
 131 * @viodev: VIO device requesting IO memory
 132 * @size: size of allocation requested
 133 *
 134 * Allocations come from memory reserved for the devices and any excess
 135 * IO memory available to all devices.  The spare pool used to service
 136 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
 137 * made available.
 138 *
 139 * Return codes:
 140 *  0 for successful allocation and -ENOMEM for a failure
 141 */
 142static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
 143{
 144        unsigned long flags;
 145        size_t reserve_free = 0;
 146        size_t excess_free = 0;
 147        int ret = -ENOMEM;
 148
 149        spin_lock_irqsave(&vio_cmo.lock, flags);
 150
 151        /* Determine the amount of free entitlement available in reserve */
 152        if (viodev->cmo.entitled > viodev->cmo.allocated)
 153                reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
 154
 155        /* If spare is not fulfilled, the excess pool can not be used. */
 156        if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
 157                excess_free = vio_cmo.excess.free;
 158
 159        /* The request can be satisfied */
 160        if ((reserve_free + excess_free) >= size) {
 161                vio_cmo.curr += size;
 162                if (vio_cmo.curr > vio_cmo.high)
 163                        vio_cmo.high = vio_cmo.curr;
 164                viodev->cmo.allocated += size;
 165                size -= min(reserve_free, size);
 166                vio_cmo.excess.free -= size;
 167                ret = 0;
 168        }
 169
 170        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 171        return ret;
 172}
 173
 174/**
 175 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
 176 * @viodev: VIO device freeing IO memory
 177 * @size: size of deallocation
 178 *
 179 * IO memory is freed by the device back to the correct memory pools.
 180 * The spare pool is replenished first from either memory pool, then
 181 * the reserve pool is used to reduce device entitlement, the excess
 182 * pool is used to increase the reserve pool toward the desired entitlement
 183 * target, and then the remaining memory is returned to the pools.
 184 *
 185 */
 186static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
 187{
 188        unsigned long flags;
 189        size_t spare_needed = 0;
 190        size_t excess_freed = 0;
 191        size_t reserve_freed = size;
 192        size_t tmp;
 193        int balance = 0;
 194
 195        spin_lock_irqsave(&vio_cmo.lock, flags);
 196        vio_cmo.curr -= size;
 197
 198        /* Amount of memory freed from the excess pool */
 199        if (viodev->cmo.allocated > viodev->cmo.entitled) {
 200                excess_freed = min(reserve_freed, (viodev->cmo.allocated -
 201                                                   viodev->cmo.entitled));
 202                reserve_freed -= excess_freed;
 203        }
 204
 205        /* Remove allocation from device */
 206        viodev->cmo.allocated -= (reserve_freed + excess_freed);
 207
 208        /* Spare is a subset of the reserve pool, replenish it first. */
 209        spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
 210
 211        /*
 212         * Replenish the spare in the reserve pool from the excess pool.
 213         * This moves entitlement into the reserve pool.
 214         */
 215        if (spare_needed && excess_freed) {
 216                tmp = min(excess_freed, spare_needed);
 217                vio_cmo.excess.size -= tmp;
 218                vio_cmo.reserve.size += tmp;
 219                vio_cmo.spare += tmp;
 220                excess_freed -= tmp;
 221                spare_needed -= tmp;
 222                balance = 1;
 223        }
 224
 225        /*
 226         * Replenish the spare in the reserve pool from the reserve pool.
 227         * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
 228         * if needed, and gives it to the spare pool. The amount of used
 229         * memory in this pool does not change.
 230         */
 231        if (spare_needed && reserve_freed) {
 232                tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
 233
 234                vio_cmo.spare += tmp;
 235                viodev->cmo.entitled -= tmp;
 236                reserve_freed -= tmp;
 237                spare_needed -= tmp;
 238                balance = 1;
 239        }
 240
 241        /*
 242         * Increase the reserve pool until the desired allocation is met.
 243         * Move an allocation freed from the excess pool into the reserve
 244         * pool and schedule a balance operation.
 245         */
 246        if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
 247                tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
 248
 249                vio_cmo.excess.size -= tmp;
 250                vio_cmo.reserve.size += tmp;
 251                excess_freed -= tmp;
 252                balance = 1;
 253        }
 254
 255        /* Return memory from the excess pool to that pool */
 256        if (excess_freed)
 257                vio_cmo.excess.free += excess_freed;
 258
 259        if (balance)
 260                schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
 261        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 262}
 263
 264/**
 265 * vio_cmo_entitlement_update - Manage system entitlement changes
 266 *
 267 * @new_entitlement: new system entitlement to attempt to accommodate
 268 *
 269 * Increases in entitlement will be used to fulfill the spare entitlement
 270 * and the rest is given to the excess pool.  Decreases, if they are
 271 * possible, come from the excess pool and from unused device entitlement
 272 *
 273 * Returns: 0 on success, -ENOMEM when change can not be made
 274 */
 275int vio_cmo_entitlement_update(size_t new_entitlement)
 276{
 277        struct vio_dev *viodev;
 278        struct vio_cmo_dev_entry *dev_ent;
 279        unsigned long flags;
 280        size_t avail, delta, tmp;
 281
 282        spin_lock_irqsave(&vio_cmo.lock, flags);
 283
 284        /* Entitlement increases */
 285        if (new_entitlement > vio_cmo.entitled) {
 286                delta = new_entitlement - vio_cmo.entitled;
 287
 288                /* Fulfill spare allocation */
 289                if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
 290                        tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
 291                        vio_cmo.spare += tmp;
 292                        vio_cmo.reserve.size += tmp;
 293                        delta -= tmp;
 294                }
 295
 296                /* Remaining new allocation goes to the excess pool */
 297                vio_cmo.entitled += delta;
 298                vio_cmo.excess.size += delta;
 299                vio_cmo.excess.free += delta;
 300
 301                goto out;
 302        }
 303
 304        /* Entitlement decreases */
 305        delta = vio_cmo.entitled - new_entitlement;
 306        avail = vio_cmo.excess.free;
 307
 308        /*
 309         * Need to check how much unused entitlement each device can
 310         * sacrifice to fulfill entitlement change.
 311         */
 312        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 313                if (avail >= delta)
 314                        break;
 315
 316                viodev = dev_ent->viodev;
 317                if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
 318                    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
 319                                avail += viodev->cmo.entitled -
 320                                         max_t(size_t, viodev->cmo.allocated,
 321                                               VIO_CMO_MIN_ENT);
 322        }
 323
 324        if (delta <= avail) {
 325                vio_cmo.entitled -= delta;
 326
 327                /* Take entitlement from the excess pool first */
 328                tmp = min(vio_cmo.excess.free, delta);
 329                vio_cmo.excess.size -= tmp;
 330                vio_cmo.excess.free -= tmp;
 331                delta -= tmp;
 332
 333                /*
 334                 * Remove all but VIO_CMO_MIN_ENT bytes from devices
 335                 * until entitlement change is served
 336                 */
 337                list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 338                        if (!delta)
 339                                break;
 340
 341                        viodev = dev_ent->viodev;
 342                        tmp = 0;
 343                        if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
 344                            (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
 345                                tmp = viodev->cmo.entitled -
 346                                      max_t(size_t, viodev->cmo.allocated,
 347                                            VIO_CMO_MIN_ENT);
 348                        viodev->cmo.entitled -= min(tmp, delta);
 349                        delta -= min(tmp, delta);
 350                }
 351        } else {
 352                spin_unlock_irqrestore(&vio_cmo.lock, flags);
 353                return -ENOMEM;
 354        }
 355
 356out:
 357        schedule_delayed_work(&vio_cmo.balance_q, 0);
 358        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 359        return 0;
 360}
 361
 362/**
 363 * vio_cmo_balance - Balance entitlement among devices
 364 *
 365 * @work: work queue structure for this operation
 366 *
 367 * Any system entitlement above the minimum needed for devices, or
 368 * already allocated to devices, can be distributed to the devices.
 369 * The list of devices is iterated through to recalculate the desired
 370 * entitlement level and to determine how much entitlement above the
 371 * minimum entitlement is allocated to devices.
 372 *
 373 * Small chunks of the available entitlement are given to devices until
 374 * their requirements are fulfilled or there is no entitlement left to give.
 375 * Upon completion sizes of the reserve and excess pools are calculated.
 376 *
 377 * The system minimum entitlement level is also recalculated here.
 378 * Entitlement will be reserved for devices even after vio_bus_remove to
 379 * accommodate reloading the driver.  The OF tree is walked to count the
 380 * number of devices present and this will remove entitlement for devices
 381 * that have actually left the system after having vio_bus_remove called.
 382 */
 383static void vio_cmo_balance(struct work_struct *work)
 384{
 385        struct vio_cmo *cmo;
 386        struct vio_dev *viodev;
 387        struct vio_cmo_dev_entry *dev_ent;
 388        unsigned long flags;
 389        size_t avail = 0, level, chunk, need;
 390        int devcount = 0, fulfilled;
 391
 392        cmo = container_of(work, struct vio_cmo, balance_q.work);
 393
 394        spin_lock_irqsave(&vio_cmo.lock, flags);
 395
 396        /* Calculate minimum entitlement and fulfill spare */
 397        cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
 398        BUG_ON(cmo->min > cmo->entitled);
 399        cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
 400        cmo->min += cmo->spare;
 401        cmo->desired = cmo->min;
 402
 403        /*
 404         * Determine how much entitlement is available and reset device
 405         * entitlements
 406         */
 407        avail = cmo->entitled - cmo->spare;
 408        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 409                viodev = dev_ent->viodev;
 410                devcount++;
 411                viodev->cmo.entitled = VIO_CMO_MIN_ENT;
 412                cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
 413                avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
 414        }
 415
 416        /*
 417         * Having provided each device with the minimum entitlement, loop
 418         * over the devices portioning out the remaining entitlement
 419         * until there is nothing left.
 420         */
 421        level = VIO_CMO_MIN_ENT;
 422        while (avail) {
 423                fulfilled = 0;
 424                list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 425                        viodev = dev_ent->viodev;
 426
 427                        if (viodev->cmo.desired <= level) {
 428                                fulfilled++;
 429                                continue;
 430                        }
 431
 432                        /*
 433                         * Give the device up to VIO_CMO_BALANCE_CHUNK
 434                         * bytes of entitlement, but do not exceed the
 435                         * desired level of entitlement for the device.
 436                         */
 437                        chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
 438                        chunk = min(chunk, (viodev->cmo.desired -
 439                                            viodev->cmo.entitled));
 440                        viodev->cmo.entitled += chunk;
 441
 442                        /*
 443                         * If the memory for this entitlement increase was
 444                         * already allocated to the device it does not come
 445                         * from the available pool being portioned out.
 446                         */
 447                        need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
 448                               max(viodev->cmo.allocated, level);
 449                        avail -= need;
 450
 451                }
 452                if (fulfilled == devcount)
 453                        break;
 454                level += VIO_CMO_BALANCE_CHUNK;
 455        }
 456
 457        /* Calculate new reserve and excess pool sizes */
 458        cmo->reserve.size = cmo->min;
 459        cmo->excess.free = 0;
 460        cmo->excess.size = 0;
 461        need = 0;
 462        list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
 463                viodev = dev_ent->viodev;
 464                /* Calculated reserve size above the minimum entitlement */
 465                if (viodev->cmo.entitled)
 466                        cmo->reserve.size += (viodev->cmo.entitled -
 467                                              VIO_CMO_MIN_ENT);
 468                /* Calculated used excess entitlement */
 469                if (viodev->cmo.allocated > viodev->cmo.entitled)
 470                        need += viodev->cmo.allocated - viodev->cmo.entitled;
 471        }
 472        cmo->excess.size = cmo->entitled - cmo->reserve.size;
 473        cmo->excess.free = cmo->excess.size - need;
 474
 475        cancel_delayed_work(to_delayed_work(work));
 476        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 477}
 478
 479static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
 480                                          dma_addr_t *dma_handle, gfp_t flag,
 481                                          unsigned long attrs)
 482{
 483        struct vio_dev *viodev = to_vio_dev(dev);
 484        void *ret;
 485
 486        if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
 487                atomic_inc(&viodev->cmo.allocs_failed);
 488                return NULL;
 489        }
 490
 491        ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
 492                                    dma_handle, dev->coherent_dma_mask, flag,
 493                                    dev_to_node(dev));
 494        if (unlikely(ret == NULL)) {
 495                vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
 496                atomic_inc(&viodev->cmo.allocs_failed);
 497        }
 498
 499        return ret;
 500}
 501
 502static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
 503                                        void *vaddr, dma_addr_t dma_handle,
 504                                        unsigned long attrs)
 505{
 506        struct vio_dev *viodev = to_vio_dev(dev);
 507
 508        iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
 509        vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
 510}
 511
 512static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
 513                                         unsigned long offset, size_t size,
 514                                         enum dma_data_direction direction,
 515                                         unsigned long attrs)
 516{
 517        struct vio_dev *viodev = to_vio_dev(dev);
 518        struct iommu_table *tbl = get_iommu_table_base(dev);
 519        dma_addr_t ret = DMA_MAPPING_ERROR;
 520
 521        if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
 522                goto out_fail;
 523        ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev),
 524                        direction, attrs);
 525        if (unlikely(ret == DMA_MAPPING_ERROR))
 526                goto out_deallocate;
 527        return ret;
 528
 529out_deallocate:
 530        vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
 531out_fail:
 532        atomic_inc(&viodev->cmo.allocs_failed);
 533        return DMA_MAPPING_ERROR;
 534}
 535
 536static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
 537                                     size_t size,
 538                                     enum dma_data_direction direction,
 539                                     unsigned long attrs)
 540{
 541        struct vio_dev *viodev = to_vio_dev(dev);
 542        struct iommu_table *tbl = get_iommu_table_base(dev);
 543
 544        iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
 545        vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
 546}
 547
 548static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
 549                                int nelems, enum dma_data_direction direction,
 550                                unsigned long attrs)
 551{
 552        struct vio_dev *viodev = to_vio_dev(dev);
 553        struct iommu_table *tbl = get_iommu_table_base(dev);
 554        struct scatterlist *sgl;
 555        int ret, count;
 556        size_t alloc_size = 0;
 557
 558        for_each_sg(sglist, sgl, nelems, count)
 559                alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
 560
 561        if (vio_cmo_alloc(viodev, alloc_size))
 562                goto out_fail;
 563        ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
 564                        direction, attrs);
 565        if (unlikely(!ret))
 566                goto out_deallocate;
 567
 568        for_each_sg(sglist, sgl, ret, count)
 569                alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
 570        if (alloc_size)
 571                vio_cmo_dealloc(viodev, alloc_size);
 572        return ret;
 573
 574out_deallocate:
 575        vio_cmo_dealloc(viodev, alloc_size);
 576out_fail:
 577        atomic_inc(&viodev->cmo.allocs_failed);
 578        return 0;
 579}
 580
 581static void vio_dma_iommu_unmap_sg(struct device *dev,
 582                struct scatterlist *sglist, int nelems,
 583                enum dma_data_direction direction,
 584                unsigned long attrs)
 585{
 586        struct vio_dev *viodev = to_vio_dev(dev);
 587        struct iommu_table *tbl = get_iommu_table_base(dev);
 588        struct scatterlist *sgl;
 589        size_t alloc_size = 0;
 590        int count;
 591
 592        for_each_sg(sglist, sgl, nelems, count)
 593                alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
 594
 595        ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
 596        vio_cmo_dealloc(viodev, alloc_size);
 597}
 598
 599static const struct dma_map_ops vio_dma_mapping_ops = {
 600        .alloc             = vio_dma_iommu_alloc_coherent,
 601        .free              = vio_dma_iommu_free_coherent,
 602        .map_sg            = vio_dma_iommu_map_sg,
 603        .unmap_sg          = vio_dma_iommu_unmap_sg,
 604        .map_page          = vio_dma_iommu_map_page,
 605        .unmap_page        = vio_dma_iommu_unmap_page,
 606        .dma_supported     = dma_iommu_dma_supported,
 607        .get_required_mask = dma_iommu_get_required_mask,
 608        .mmap              = dma_common_mmap,
 609        .get_sgtable       = dma_common_get_sgtable,
 610};
 611
 612/**
 613 * vio_cmo_set_dev_desired - Set desired entitlement for a device
 614 *
 615 * @viodev: struct vio_dev for device to alter
 616 * @desired: new desired entitlement level in bytes
 617 *
 618 * For use by devices to request a change to their entitlement at runtime or
 619 * through sysfs.  The desired entitlement level is changed and a balancing
 620 * of system resources is scheduled to run in the future.
 621 */
 622void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
 623{
 624        unsigned long flags;
 625        struct vio_cmo_dev_entry *dev_ent;
 626        int found = 0;
 627
 628        if (!firmware_has_feature(FW_FEATURE_CMO))
 629                return;
 630
 631        spin_lock_irqsave(&vio_cmo.lock, flags);
 632        if (desired < VIO_CMO_MIN_ENT)
 633                desired = VIO_CMO_MIN_ENT;
 634
 635        /*
 636         * Changes will not be made for devices not in the device list.
 637         * If it is not in the device list, then no driver is loaded
 638         * for the device and it can not receive entitlement.
 639         */
 640        list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
 641                if (viodev == dev_ent->viodev) {
 642                        found = 1;
 643                        break;
 644                }
 645        if (!found) {
 646                spin_unlock_irqrestore(&vio_cmo.lock, flags);
 647                return;
 648        }
 649
 650        /* Increase/decrease in desired device entitlement */
 651        if (desired >= viodev->cmo.desired) {
 652                /* Just bump the bus and device values prior to a balance*/
 653                vio_cmo.desired += desired - viodev->cmo.desired;
 654                viodev->cmo.desired = desired;
 655        } else {
 656                /* Decrease bus and device values for desired entitlement */
 657                vio_cmo.desired -= viodev->cmo.desired - desired;
 658                viodev->cmo.desired = desired;
 659                /*
 660                 * If less entitlement is desired than current entitlement, move
 661                 * any reserve memory in the change region to the excess pool.
 662                 */
 663                if (viodev->cmo.entitled > desired) {
 664                        vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
 665                        vio_cmo.excess.size += viodev->cmo.entitled - desired;
 666                        /*
 667                         * If entitlement moving from the reserve pool to the
 668                         * excess pool is currently unused, add to the excess
 669                         * free counter.
 670                         */
 671                        if (viodev->cmo.allocated < viodev->cmo.entitled)
 672                                vio_cmo.excess.free += viodev->cmo.entitled -
 673                                                       max(viodev->cmo.allocated, desired);
 674                        viodev->cmo.entitled = desired;
 675                }
 676        }
 677        schedule_delayed_work(&vio_cmo.balance_q, 0);
 678        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 679}
 680
 681/**
 682 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
 683 *
 684 * @viodev - Pointer to struct vio_dev for device
 685 *
 686 * Determine the devices IO memory entitlement needs, attempting
 687 * to satisfy the system minimum entitlement at first and scheduling
 688 * a balance operation to take care of the rest at a later time.
 689 *
 690 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
 691 *          -ENOMEM when entitlement is not available for device or
 692 *          device entry.
 693 *
 694 */
 695static int vio_cmo_bus_probe(struct vio_dev *viodev)
 696{
 697        struct vio_cmo_dev_entry *dev_ent;
 698        struct device *dev = &viodev->dev;
 699        struct iommu_table *tbl;
 700        struct vio_driver *viodrv = to_vio_driver(dev->driver);
 701        unsigned long flags;
 702        size_t size;
 703        bool dma_capable = false;
 704
 705        tbl = get_iommu_table_base(dev);
 706
 707        /* A device requires entitlement if it has a DMA window property */
 708        switch (viodev->family) {
 709        case VDEVICE:
 710                if (of_get_property(viodev->dev.of_node,
 711                                        "ibm,my-dma-window", NULL))
 712                        dma_capable = true;
 713                break;
 714        case PFO:
 715                dma_capable = false;
 716                break;
 717        default:
 718                dev_warn(dev, "unknown device family: %d\n", viodev->family);
 719                BUG();
 720                break;
 721        }
 722
 723        /* Configure entitlement for the device. */
 724        if (dma_capable) {
 725                /* Check that the driver is CMO enabled and get desired DMA */
 726                if (!viodrv->get_desired_dma) {
 727                        dev_err(dev, "%s: device driver does not support CMO\n",
 728                                __func__);
 729                        return -EINVAL;
 730                }
 731
 732                viodev->cmo.desired =
 733                        IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
 734                if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
 735                        viodev->cmo.desired = VIO_CMO_MIN_ENT;
 736                size = VIO_CMO_MIN_ENT;
 737
 738                dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
 739                                  GFP_KERNEL);
 740                if (!dev_ent)
 741                        return -ENOMEM;
 742
 743                dev_ent->viodev = viodev;
 744                spin_lock_irqsave(&vio_cmo.lock, flags);
 745                list_add(&dev_ent->list, &vio_cmo.device_list);
 746        } else {
 747                viodev->cmo.desired = 0;
 748                size = 0;
 749                spin_lock_irqsave(&vio_cmo.lock, flags);
 750        }
 751
 752        /*
 753         * If the needs for vio_cmo.min have not changed since they
 754         * were last set, the number of devices in the OF tree has
 755         * been constant and the IO memory for this is already in
 756         * the reserve pool.
 757         */
 758        if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
 759                            VIO_CMO_MIN_ENT)) {
 760                /* Updated desired entitlement if device requires it */
 761                if (size)
 762                        vio_cmo.desired += (viodev->cmo.desired -
 763                                        VIO_CMO_MIN_ENT);
 764        } else {
 765                size_t tmp;
 766
 767                tmp = vio_cmo.spare + vio_cmo.excess.free;
 768                if (tmp < size) {
 769                        dev_err(dev, "%s: insufficient free "
 770                                "entitlement to add device. "
 771                                "Need %lu, have %lu\n", __func__,
 772                                size, (vio_cmo.spare + tmp));
 773                        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 774                        return -ENOMEM;
 775                }
 776
 777                /* Use excess pool first to fulfill request */
 778                tmp = min(size, vio_cmo.excess.free);
 779                vio_cmo.excess.free -= tmp;
 780                vio_cmo.excess.size -= tmp;
 781                vio_cmo.reserve.size += tmp;
 782
 783                /* Use spare if excess pool was insufficient */
 784                vio_cmo.spare -= size - tmp;
 785
 786                /* Update bus accounting */
 787                vio_cmo.min += size;
 788                vio_cmo.desired += viodev->cmo.desired;
 789        }
 790        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 791        return 0;
 792}
 793
 794/**
 795 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
 796 *
 797 * @viodev - Pointer to struct vio_dev for device
 798 *
 799 * Remove the device from the cmo device list.  The minimum entitlement
 800 * will be reserved for the device as long as it is in the system.  The
 801 * rest of the entitlement the device had been allocated will be returned
 802 * to the system.
 803 */
 804static void vio_cmo_bus_remove(struct vio_dev *viodev)
 805{
 806        struct vio_cmo_dev_entry *dev_ent;
 807        unsigned long flags;
 808        size_t tmp;
 809
 810        spin_lock_irqsave(&vio_cmo.lock, flags);
 811        if (viodev->cmo.allocated) {
 812                dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
 813                        "allocated after remove operation.\n",
 814                        __func__, viodev->cmo.allocated);
 815                BUG();
 816        }
 817
 818        /*
 819         * Remove the device from the device list being maintained for
 820         * CMO enabled devices.
 821         */
 822        list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
 823                if (viodev == dev_ent->viodev) {
 824                        list_del(&dev_ent->list);
 825                        kfree(dev_ent);
 826                        break;
 827                }
 828
 829        /*
 830         * Devices may not require any entitlement and they do not need
 831         * to be processed.  Otherwise, return the device's entitlement
 832         * back to the pools.
 833         */
 834        if (viodev->cmo.entitled) {
 835                /*
 836                 * This device has not yet left the OF tree, it's
 837                 * minimum entitlement remains in vio_cmo.min and
 838                 * vio_cmo.desired
 839                 */
 840                vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
 841
 842                /*
 843                 * Save min allocation for device in reserve as long
 844                 * as it exists in OF tree as determined by later
 845                 * balance operation
 846                 */
 847                viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
 848
 849                /* Replenish spare from freed reserve pool */
 850                if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
 851                        tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
 852                                                         vio_cmo.spare));
 853                        vio_cmo.spare += tmp;
 854                        viodev->cmo.entitled -= tmp;
 855                }
 856
 857                /* Remaining reserve goes to excess pool */
 858                vio_cmo.excess.size += viodev->cmo.entitled;
 859                vio_cmo.excess.free += viodev->cmo.entitled;
 860                vio_cmo.reserve.size -= viodev->cmo.entitled;
 861
 862                /*
 863                 * Until the device is removed it will keep a
 864                 * minimum entitlement; this will guarantee that
 865                 * a module unload/load will result in a success.
 866                 */
 867                viodev->cmo.entitled = VIO_CMO_MIN_ENT;
 868                viodev->cmo.desired = VIO_CMO_MIN_ENT;
 869                atomic_set(&viodev->cmo.allocs_failed, 0);
 870        }
 871
 872        spin_unlock_irqrestore(&vio_cmo.lock, flags);
 873}
 874
 875static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
 876{
 877        set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
 878}
 879
 880/**
 881 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
 882 *
 883 * Set up the reserve and excess entitlement pools based on available
 884 * system entitlement and the number of devices in the OF tree that
 885 * require entitlement in the reserve pool.
 886 */
 887static void vio_cmo_bus_init(void)
 888{
 889        struct hvcall_mpp_data mpp_data;
 890        int err;
 891
 892        memset(&vio_cmo, 0, sizeof(struct vio_cmo));
 893        spin_lock_init(&vio_cmo.lock);
 894        INIT_LIST_HEAD(&vio_cmo.device_list);
 895        INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
 896
 897        /* Get current system entitlement */
 898        err = h_get_mpp(&mpp_data);
 899
 900        /*
 901         * On failure, continue with entitlement set to 0, will panic()
 902         * later when spare is reserved.
 903         */
 904        if (err != H_SUCCESS) {
 905                printk(KERN_ERR "%s: unable to determine system IO "\
 906                       "entitlement. (%d)\n", __func__, err);
 907                vio_cmo.entitled = 0;
 908        } else {
 909                vio_cmo.entitled = mpp_data.entitled_mem;
 910        }
 911
 912        /* Set reservation and check against entitlement */
 913        vio_cmo.spare = VIO_CMO_MIN_ENT;
 914        vio_cmo.reserve.size = vio_cmo.spare;
 915        vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
 916                                 VIO_CMO_MIN_ENT);
 917        if (vio_cmo.reserve.size > vio_cmo.entitled) {
 918                printk(KERN_ERR "%s: insufficient system entitlement\n",
 919                       __func__);
 920                panic("%s: Insufficient system entitlement", __func__);
 921        }
 922
 923        /* Set the remaining accounting variables */
 924        vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
 925        vio_cmo.excess.free = vio_cmo.excess.size;
 926        vio_cmo.min = vio_cmo.reserve.size;
 927        vio_cmo.desired = vio_cmo.reserve.size;
 928}
 929
 930/* sysfs device functions and data structures for CMO */
 931
 932#define viodev_cmo_rd_attr(name)                                        \
 933static ssize_t cmo_##name##_show(struct device *dev,                    \
 934                                        struct device_attribute *attr,  \
 935                                         char *buf)                     \
 936{                                                                       \
 937        return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
 938}
 939
 940static ssize_t cmo_allocs_failed_show(struct device *dev,
 941                struct device_attribute *attr, char *buf)
 942{
 943        struct vio_dev *viodev = to_vio_dev(dev);
 944        return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
 945}
 946
 947static ssize_t cmo_allocs_failed_store(struct device *dev,
 948                struct device_attribute *attr, const char *buf, size_t count)
 949{
 950        struct vio_dev *viodev = to_vio_dev(dev);
 951        atomic_set(&viodev->cmo.allocs_failed, 0);
 952        return count;
 953}
 954
 955static ssize_t cmo_desired_store(struct device *dev,
 956                struct device_attribute *attr, const char *buf, size_t count)
 957{
 958        struct vio_dev *viodev = to_vio_dev(dev);
 959        size_t new_desired;
 960        int ret;
 961
 962        ret = kstrtoul(buf, 10, &new_desired);
 963        if (ret)
 964                return ret;
 965
 966        vio_cmo_set_dev_desired(viodev, new_desired);
 967        return count;
 968}
 969
 970viodev_cmo_rd_attr(desired);
 971viodev_cmo_rd_attr(entitled);
 972viodev_cmo_rd_attr(allocated);
 973
 974static ssize_t name_show(struct device *, struct device_attribute *, char *);
 975static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
 976static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 977                             char *buf);
 978
 979static struct device_attribute dev_attr_name;
 980static struct device_attribute dev_attr_devspec;
 981static struct device_attribute dev_attr_modalias;
 982
 983static DEVICE_ATTR_RO(cmo_entitled);
 984static DEVICE_ATTR_RO(cmo_allocated);
 985static DEVICE_ATTR_RW(cmo_desired);
 986static DEVICE_ATTR_RW(cmo_allocs_failed);
 987
 988static struct attribute *vio_cmo_dev_attrs[] = {
 989        &dev_attr_name.attr,
 990        &dev_attr_devspec.attr,
 991        &dev_attr_modalias.attr,
 992        &dev_attr_cmo_entitled.attr,
 993        &dev_attr_cmo_allocated.attr,
 994        &dev_attr_cmo_desired.attr,
 995        &dev_attr_cmo_allocs_failed.attr,
 996        NULL,
 997};
 998ATTRIBUTE_GROUPS(vio_cmo_dev);
 999
1000/* sysfs bus functions and data structures for CMO */
1001
1002#define viobus_cmo_rd_attr(name)                                        \
1003static ssize_t cmo_bus_##name##_show(struct bus_type *bt, char *buf)    \
1004{                                                                       \
1005        return sprintf(buf, "%lu\n", vio_cmo.name);                     \
1006}                                                                       \
1007static struct bus_attribute bus_attr_cmo_bus_##name =                   \
1008        __ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
1009
1010#define viobus_cmo_pool_rd_attr(name, var)                              \
1011static ssize_t                                                          \
1012cmo_##name##_##var##_show(struct bus_type *bt, char *buf)               \
1013{                                                                       \
1014        return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
1015}                                                                       \
1016static BUS_ATTR_RO(cmo_##name##_##var)
1017
1018viobus_cmo_rd_attr(entitled);
1019viobus_cmo_rd_attr(spare);
1020viobus_cmo_rd_attr(min);
1021viobus_cmo_rd_attr(desired);
1022viobus_cmo_rd_attr(curr);
1023viobus_cmo_pool_rd_attr(reserve, size);
1024viobus_cmo_pool_rd_attr(excess, size);
1025viobus_cmo_pool_rd_attr(excess, free);
1026
1027static ssize_t cmo_high_show(struct bus_type *bt, char *buf)
1028{
1029        return sprintf(buf, "%lu\n", vio_cmo.high);
1030}
1031
1032static ssize_t cmo_high_store(struct bus_type *bt, const char *buf,
1033                              size_t count)
1034{
1035        unsigned long flags;
1036
1037        spin_lock_irqsave(&vio_cmo.lock, flags);
1038        vio_cmo.high = vio_cmo.curr;
1039        spin_unlock_irqrestore(&vio_cmo.lock, flags);
1040
1041        return count;
1042}
1043static BUS_ATTR_RW(cmo_high);
1044
1045static struct attribute *vio_bus_attrs[] = {
1046        &bus_attr_cmo_bus_entitled.attr,
1047        &bus_attr_cmo_bus_spare.attr,
1048        &bus_attr_cmo_bus_min.attr,
1049        &bus_attr_cmo_bus_desired.attr,
1050        &bus_attr_cmo_bus_curr.attr,
1051        &bus_attr_cmo_high.attr,
1052        &bus_attr_cmo_reserve_size.attr,
1053        &bus_attr_cmo_excess_size.attr,
1054        &bus_attr_cmo_excess_free.attr,
1055        NULL,
1056};
1057ATTRIBUTE_GROUPS(vio_bus);
1058
1059static void vio_cmo_sysfs_init(void)
1060{
1061        vio_bus_type.dev_groups = vio_cmo_dev_groups;
1062        vio_bus_type.bus_groups = vio_bus_groups;
1063}
1064#else /* CONFIG_PPC_SMLPAR */
1065int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1066void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1067static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1068static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1069static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1070static void vio_cmo_bus_init(void) {}
1071static void vio_cmo_sysfs_init(void) { }
1072#endif /* CONFIG_PPC_SMLPAR */
1073EXPORT_SYMBOL(vio_cmo_entitlement_update);
1074EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1075
1076
1077/*
1078 * Platform Facilities Option (PFO) support
1079 */
1080
1081/**
1082 * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
1083 *
1084 * @vdev - Pointer to a struct vio_dev for device
1085 * @op - Pointer to a struct vio_pfo_op for the operation parameters
1086 *
1087 * Calls the hypervisor to synchronously perform the PFO operation
1088 * described in @op.  In the case of a busy response from the hypervisor,
1089 * the operation will be re-submitted indefinitely unless a non-zero timeout
1090 * is specified or an error occurs. The timeout places a limit on when to
1091 * stop re-submitting a operation, the total time can be exceeded if an
1092 * operation is in progress.
1093 *
1094 * If op->hcall_ret is not NULL, this will be set to the return from the
1095 * last h_cop_op call or it will be 0 if an error not involving the h_call
1096 * was encountered.
1097 *
1098 * Returns:
1099 *      0 on success,
1100 *      -EINVAL if the h_call fails due to an invalid parameter,
1101 *      -E2BIG if the h_call can not be performed synchronously,
1102 *      -EBUSY if a timeout is specified and has elapsed,
1103 *      -EACCES if the memory area for data/status has been rescinded, or
1104 *      -EPERM if a hardware fault has been indicated
1105 */
1106int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
1107{
1108        struct device *dev = &vdev->dev;
1109        unsigned long deadline = 0;
1110        long hret = 0;
1111        int ret = 0;
1112
1113        if (op->timeout)
1114                deadline = jiffies + msecs_to_jiffies(op->timeout);
1115
1116        while (true) {
1117                hret = plpar_hcall_norets(H_COP, op->flags,
1118                                vdev->resource_id,
1119                                op->in, op->inlen, op->out,
1120                                op->outlen, op->csbcpb);
1121
1122                if (hret == H_SUCCESS ||
1123                    (hret != H_NOT_ENOUGH_RESOURCES &&
1124                     hret != H_BUSY && hret != H_RESOURCE) ||
1125                    (op->timeout && time_after(deadline, jiffies)))
1126                        break;
1127
1128                dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
1129        }
1130
1131        switch (hret) {
1132        case H_SUCCESS:
1133                ret = 0;
1134                break;
1135        case H_OP_MODE:
1136        case H_TOO_BIG:
1137                ret = -E2BIG;
1138                break;
1139        case H_RESCINDED:
1140                ret = -EACCES;
1141                break;
1142        case H_HARDWARE:
1143                ret = -EPERM;
1144                break;
1145        case H_NOT_ENOUGH_RESOURCES:
1146        case H_RESOURCE:
1147        case H_BUSY:
1148                ret = -EBUSY;
1149                break;
1150        default:
1151                ret = -EINVAL;
1152                break;
1153        }
1154
1155        if (ret)
1156                dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
1157                                __func__, ret, hret);
1158
1159        op->hcall_err = hret;
1160        return ret;
1161}
1162EXPORT_SYMBOL(vio_h_cop_sync);
1163
1164static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1165{
1166        const __be32 *dma_window;
1167        struct iommu_table *tbl;
1168        unsigned long offset, size;
1169
1170        dma_window = of_get_property(dev->dev.of_node,
1171                                  "ibm,my-dma-window", NULL);
1172        if (!dma_window)
1173                return NULL;
1174
1175        tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
1176        if (tbl == NULL)
1177                return NULL;
1178
1179        of_parse_dma_window(dev->dev.of_node, dma_window,
1180                            &tbl->it_index, &offset, &size);
1181
1182        /* TCE table size - measured in tce entries */
1183        tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
1184        tbl->it_size = size >> tbl->it_page_shift;
1185        /* offset for VIO should always be 0 */
1186        tbl->it_offset = offset >> tbl->it_page_shift;
1187        tbl->it_busno = 0;
1188        tbl->it_type = TCE_VB;
1189        tbl->it_blocksize = 16;
1190
1191        if (firmware_has_feature(FW_FEATURE_LPAR))
1192                tbl->it_ops = &iommu_table_lpar_multi_ops;
1193        else
1194                tbl->it_ops = &iommu_table_pseries_ops;
1195
1196        return iommu_init_table(tbl, -1, 0, 0);
1197}
1198
1199/**
1200 * vio_match_device: - Tell if a VIO device has a matching
1201 *                      VIO device id structure.
1202 * @ids:        array of VIO device id structures to search in
1203 * @dev:        the VIO device structure to match against
1204 *
1205 * Used by a driver to check whether a VIO device present in the
1206 * system is in its list of supported devices. Returns the matching
1207 * vio_device_id structure or NULL if there is no match.
1208 */
1209static const struct vio_device_id *vio_match_device(
1210                const struct vio_device_id *ids, const struct vio_dev *dev)
1211{
1212        while (ids->type[0] != '\0') {
1213                if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
1214                    of_device_is_compatible(dev->dev.of_node,
1215                                         ids->compat))
1216                        return ids;
1217                ids++;
1218        }
1219        return NULL;
1220}
1221
1222/*
1223 * Convert from struct device to struct vio_dev and pass to driver.
1224 * dev->driver has already been set by generic code because vio_bus_match
1225 * succeeded.
1226 */
1227static int vio_bus_probe(struct device *dev)
1228{
1229        struct vio_dev *viodev = to_vio_dev(dev);
1230        struct vio_driver *viodrv = to_vio_driver(dev->driver);
1231        const struct vio_device_id *id;
1232        int error = -ENODEV;
1233
1234        if (!viodrv->probe)
1235                return error;
1236
1237        id = vio_match_device(viodrv->id_table, viodev);
1238        if (id) {
1239                memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1240                if (firmware_has_feature(FW_FEATURE_CMO)) {
1241                        error = vio_cmo_bus_probe(viodev);
1242                        if (error)
1243                                return error;
1244                }
1245                error = viodrv->probe(viodev, id);
1246                if (error && firmware_has_feature(FW_FEATURE_CMO))
1247                        vio_cmo_bus_remove(viodev);
1248        }
1249
1250        return error;
1251}
1252
1253/* convert from struct device to struct vio_dev and pass to driver. */
1254static int vio_bus_remove(struct device *dev)
1255{
1256        struct vio_dev *viodev = to_vio_dev(dev);
1257        struct vio_driver *viodrv = to_vio_driver(dev->driver);
1258        struct device *devptr;
1259        int ret = 1;
1260
1261        /*
1262         * Hold a reference to the device after the remove function is called
1263         * to allow for CMO accounting cleanup for the device.
1264         */
1265        devptr = get_device(dev);
1266
1267        if (viodrv->remove)
1268                ret = viodrv->remove(viodev);
1269
1270        if (!ret && firmware_has_feature(FW_FEATURE_CMO))
1271                vio_cmo_bus_remove(viodev);
1272
1273        put_device(devptr);
1274        return ret;
1275}
1276
1277/**
1278 * vio_register_driver: - Register a new vio driver
1279 * @viodrv:     The vio_driver structure to be registered.
1280 */
1281int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
1282                          const char *mod_name)
1283{
1284        pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
1285
1286        /* fill in 'struct driver' fields */
1287        viodrv->driver.name = viodrv->name;
1288        viodrv->driver.pm = viodrv->pm;
1289        viodrv->driver.bus = &vio_bus_type;
1290        viodrv->driver.owner = owner;
1291        viodrv->driver.mod_name = mod_name;
1292
1293        return driver_register(&viodrv->driver);
1294}
1295EXPORT_SYMBOL(__vio_register_driver);
1296
1297/**
1298 * vio_unregister_driver - Remove registration of vio driver.
1299 * @viodrv:     The vio_driver struct to be removed form registration
1300 */
1301void vio_unregister_driver(struct vio_driver *viodrv)
1302{
1303        driver_unregister(&viodrv->driver);
1304}
1305EXPORT_SYMBOL(vio_unregister_driver);
1306
1307/* vio_dev refcount hit 0 */
1308static void vio_dev_release(struct device *dev)
1309{
1310        struct iommu_table *tbl = get_iommu_table_base(dev);
1311
1312        if (tbl)
1313                iommu_tce_table_put(tbl);
1314        of_node_put(dev->of_node);
1315        kfree(to_vio_dev(dev));
1316}
1317
1318/**
1319 * vio_register_device_node: - Register a new vio device.
1320 * @of_node:    The OF node for this device.
1321 *
1322 * Creates and initializes a vio_dev structure from the data in
1323 * of_node and adds it to the list of virtual devices.
1324 * Returns a pointer to the created vio_dev or NULL if node has
1325 * NULL device_type or compatible fields.
1326 */
1327struct vio_dev *vio_register_device_node(struct device_node *of_node)
1328{
1329        struct vio_dev *viodev;
1330        struct device_node *parent_node;
1331        const __be32 *prop;
1332        enum vio_dev_family family;
1333
1334        /*
1335         * Determine if this node is a under the /vdevice node or under the
1336         * /ibm,platform-facilities node.  This decides the device's family.
1337         */
1338        parent_node = of_get_parent(of_node);
1339        if (parent_node) {
1340                if (of_node_is_type(parent_node, "ibm,platform-facilities"))
1341                        family = PFO;
1342                else if (of_node_is_type(parent_node, "vdevice"))
1343                        family = VDEVICE;
1344                else {
1345                        pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
1346                                        __func__,
1347                                        parent_node,
1348                                        of_node);
1349                        of_node_put(parent_node);
1350                        return NULL;
1351                }
1352                of_node_put(parent_node);
1353        } else {
1354                pr_warn("%s: could not determine the parent of node %pOFn.\n",
1355                                __func__, of_node);
1356                return NULL;
1357        }
1358
1359        if (family == PFO) {
1360                if (of_get_property(of_node, "interrupt-controller", NULL)) {
1361                        pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
1362                                        __func__, of_node);
1363                        return NULL;
1364                }
1365        }
1366
1367        /* allocate a vio_dev for this node */
1368        viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
1369        if (viodev == NULL) {
1370                pr_warn("%s: allocation failure for VIO device.\n", __func__);
1371                return NULL;
1372        }
1373
1374        /* we need the 'device_type' property, in order to match with drivers */
1375        viodev->family = family;
1376        if (viodev->family == VDEVICE) {
1377                unsigned int unit_address;
1378
1379                viodev->type = of_node_get_device_type(of_node);
1380                if (!viodev->type) {
1381                        pr_warn("%s: node %pOFn is missing the 'device_type' "
1382                                        "property.\n", __func__, of_node);
1383                        goto out;
1384                }
1385
1386                prop = of_get_property(of_node, "reg", NULL);
1387                if (prop == NULL) {
1388                        pr_warn("%s: node %pOFn missing 'reg'\n",
1389                                        __func__, of_node);
1390                        goto out;
1391                }
1392                unit_address = of_read_number(prop, 1);
1393                dev_set_name(&viodev->dev, "%x", unit_address);
1394                viodev->irq = irq_of_parse_and_map(of_node, 0);
1395                viodev->unit_address = unit_address;
1396        } else {
1397                /* PFO devices need their resource_id for submitting COP_OPs
1398                 * This is an optional field for devices, but is required when
1399                 * performing synchronous ops */
1400                prop = of_get_property(of_node, "ibm,resource-id", NULL);
1401                if (prop != NULL)
1402                        viodev->resource_id = of_read_number(prop, 1);
1403
1404                dev_set_name(&viodev->dev, "%pOFn", of_node);
1405                viodev->type = dev_name(&viodev->dev);
1406                viodev->irq = 0;
1407        }
1408
1409        viodev->name = of_node->name;
1410        viodev->dev.of_node = of_node_get(of_node);
1411
1412        set_dev_node(&viodev->dev, of_node_to_nid(of_node));
1413
1414        /* init generic 'struct device' fields: */
1415        viodev->dev.parent = &vio_bus_device.dev;
1416        viodev->dev.bus = &vio_bus_type;
1417        viodev->dev.release = vio_dev_release;
1418
1419        if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) {
1420                if (firmware_has_feature(FW_FEATURE_CMO))
1421                        vio_cmo_set_dma_ops(viodev);
1422                else
1423                        set_dma_ops(&viodev->dev, &dma_iommu_ops);
1424
1425                set_iommu_table_base(&viodev->dev,
1426                                     vio_build_iommu_table(viodev));
1427
1428                /* needed to ensure proper operation of coherent allocations
1429                 * later, in case driver doesn't set it explicitly */
1430                viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
1431                viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
1432        }
1433
1434        /* register with generic device framework */
1435        if (device_register(&viodev->dev)) {
1436                printk(KERN_ERR "%s: failed to register device %s\n",
1437                                __func__, dev_name(&viodev->dev));
1438                put_device(&viodev->dev);
1439                return NULL;
1440        }
1441
1442        return viodev;
1443
1444out:    /* Use this exit point for any return prior to device_register */
1445        kfree(viodev);
1446
1447        return NULL;
1448}
1449EXPORT_SYMBOL(vio_register_device_node);
1450
1451/*
1452 * vio_bus_scan_for_devices - Scan OF and register each child device
1453 * @root_name - OF node name for the root of the subtree to search.
1454 *              This must be non-NULL
1455 *
1456 * Starting from the root node provide, register the device node for
1457 * each child beneath the root.
1458 */
1459static void vio_bus_scan_register_devices(char *root_name)
1460{
1461        struct device_node *node_root, *node_child;
1462
1463        if (!root_name)
1464                return;
1465
1466        node_root = of_find_node_by_name(NULL, root_name);
1467        if (node_root) {
1468
1469                /*
1470                 * Create struct vio_devices for each virtual device in
1471                 * the device tree. Drivers will associate with them later.
1472                 */
1473                node_child = of_get_next_child(node_root, NULL);
1474                while (node_child) {
1475                        vio_register_device_node(node_child);
1476                        node_child = of_get_next_child(node_root, node_child);
1477                }
1478                of_node_put(node_root);
1479        }
1480}
1481
1482/**
1483 * vio_bus_init: - Initialize the virtual IO bus
1484 */
1485static int __init vio_bus_init(void)
1486{
1487        int err;
1488
1489        if (firmware_has_feature(FW_FEATURE_CMO))
1490                vio_cmo_sysfs_init();
1491
1492        err = bus_register(&vio_bus_type);
1493        if (err) {
1494                printk(KERN_ERR "failed to register VIO bus\n");
1495                return err;
1496        }
1497
1498        /*
1499         * The fake parent of all vio devices, just to give us
1500         * a nice directory
1501         */
1502        err = device_register(&vio_bus_device.dev);
1503        if (err) {
1504                printk(KERN_WARNING "%s: device_register returned %i\n",
1505                                __func__, err);
1506                return err;
1507        }
1508
1509        if (firmware_has_feature(FW_FEATURE_CMO))
1510                vio_cmo_bus_init();
1511
1512        return 0;
1513}
1514postcore_initcall(vio_bus_init);
1515
1516static int __init vio_device_init(void)
1517{
1518        vio_bus_scan_register_devices("vdevice");
1519        vio_bus_scan_register_devices("ibm,platform-facilities");
1520
1521        return 0;
1522}
1523device_initcall(vio_device_init);
1524
1525static ssize_t name_show(struct device *dev,
1526                struct device_attribute *attr, char *buf)
1527{
1528        return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
1529}
1530static DEVICE_ATTR_RO(name);
1531
1532static ssize_t devspec_show(struct device *dev,
1533                struct device_attribute *attr, char *buf)
1534{
1535        struct device_node *of_node = dev->of_node;
1536
1537        return sprintf(buf, "%pOF\n", of_node);
1538}
1539static DEVICE_ATTR_RO(devspec);
1540
1541static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1542                             char *buf)
1543{
1544        const struct vio_dev *vio_dev = to_vio_dev(dev);
1545        struct device_node *dn;
1546        const char *cp;
1547
1548        dn = dev->of_node;
1549        if (!dn) {
1550                strcpy(buf, "\n");
1551                return strlen(buf);
1552        }
1553        cp = of_get_property(dn, "compatible", NULL);
1554        if (!cp) {
1555                strcpy(buf, "\n");
1556                return strlen(buf);
1557        }
1558
1559        return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
1560}
1561static DEVICE_ATTR_RO(modalias);
1562
1563static struct attribute *vio_dev_attrs[] = {
1564        &dev_attr_name.attr,
1565        &dev_attr_devspec.attr,
1566        &dev_attr_modalias.attr,
1567        NULL,
1568};
1569ATTRIBUTE_GROUPS(vio_dev);
1570
1571void vio_unregister_device(struct vio_dev *viodev)
1572{
1573        device_unregister(&viodev->dev);
1574        if (viodev->family == VDEVICE)
1575                irq_dispose_mapping(viodev->irq);
1576}
1577EXPORT_SYMBOL(vio_unregister_device);
1578
1579static int vio_bus_match(struct device *dev, struct device_driver *drv)
1580{
1581        const struct vio_dev *vio_dev = to_vio_dev(dev);
1582        struct vio_driver *vio_drv = to_vio_driver(drv);
1583        const struct vio_device_id *ids = vio_drv->id_table;
1584
1585        return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
1586}
1587
1588static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
1589{
1590        const struct vio_dev *vio_dev = to_vio_dev(dev);
1591        struct device_node *dn;
1592        const char *cp;
1593
1594        dn = dev->of_node;
1595        if (!dn)
1596                return -ENODEV;
1597        cp = of_get_property(dn, "compatible", NULL);
1598        if (!cp)
1599                return -ENODEV;
1600
1601        add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
1602        return 0;
1603}
1604
1605struct bus_type vio_bus_type = {
1606        .name = "vio",
1607        .dev_groups = vio_dev_groups,
1608        .uevent = vio_hotplug,
1609        .match = vio_bus_match,
1610        .probe = vio_bus_probe,
1611        .remove = vio_bus_remove,
1612};
1613
1614/**
1615 * vio_get_attribute: - get attribute for virtual device
1616 * @vdev:       The vio device to get property.
1617 * @which:      The property/attribute to be extracted.
1618 * @length:     Pointer to length of returned data size (unused if NULL).
1619 *
1620 * Calls prom.c's of_get_property() to return the value of the
1621 * attribute specified by @which
1622*/
1623const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
1624{
1625        return of_get_property(vdev->dev.of_node, which, length);
1626}
1627EXPORT_SYMBOL(vio_get_attribute);
1628
1629#ifdef CONFIG_PPC_PSERIES
1630/* vio_find_name() - internal because only vio.c knows how we formatted the
1631 * kobject name
1632 */
1633static struct vio_dev *vio_find_name(const char *name)
1634{
1635        struct device *found;
1636
1637        found = bus_find_device_by_name(&vio_bus_type, NULL, name);
1638        if (!found)
1639                return NULL;
1640
1641        return to_vio_dev(found);
1642}
1643
1644/**
1645 * vio_find_node - find an already-registered vio_dev
1646 * @vnode: device_node of the virtual device we're looking for
1647 *
1648 * Takes a reference to the embedded struct device which needs to be dropped
1649 * after use.
1650 */
1651struct vio_dev *vio_find_node(struct device_node *vnode)
1652{
1653        char kobj_name[20];
1654        struct device_node *vnode_parent;
1655
1656        vnode_parent = of_get_parent(vnode);
1657        if (!vnode_parent)
1658                return NULL;
1659
1660        /* construct the kobject name from the device node */
1661        if (of_node_is_type(vnode_parent, "vdevice")) {
1662                const __be32 *prop;
1663                
1664                prop = of_get_property(vnode, "reg", NULL);
1665                if (!prop)
1666                        goto out;
1667                snprintf(kobj_name, sizeof(kobj_name), "%x",
1668                         (uint32_t)of_read_number(prop, 1));
1669        } else if (of_node_is_type(vnode_parent, "ibm,platform-facilities"))
1670                snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
1671        else
1672                goto out;
1673
1674        of_node_put(vnode_parent);
1675        return vio_find_name(kobj_name);
1676out:
1677        of_node_put(vnode_parent);
1678        return NULL;
1679}
1680EXPORT_SYMBOL(vio_find_node);
1681
1682int vio_enable_interrupts(struct vio_dev *dev)
1683{
1684        int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
1685        if (rc != H_SUCCESS)
1686                printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
1687        return rc;
1688}
1689EXPORT_SYMBOL(vio_enable_interrupts);
1690
1691int vio_disable_interrupts(struct vio_dev *dev)
1692{
1693        int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
1694        if (rc != H_SUCCESS)
1695                printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
1696        return rc;
1697}
1698EXPORT_SYMBOL(vio_disable_interrupts);
1699#endif /* CONFIG_PPC_PSERIES */
1700
1701static int __init vio_init(void)
1702{
1703        dma_debug_add_bus(&vio_bus_type);
1704        return 0;
1705}
1706fs_initcall(vio_init);
1707