linux/drivers/misc/mic/scif/scif_rma.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2015 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * Intel SCIF driver.
  16 *
  17 */
  18#include <linux/dma_remapping.h>
  19#include <linux/pagemap.h>
  20#include "scif_main.h"
  21#include "scif_map.h"
  22
  23/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
  24#define SCIF_MAP_ULIMIT 0x40
  25
  26bool scif_ulimit_check = 1;
  27
  28/**
  29 * scif_rma_ep_init:
  30 * @ep: end point
  31 *
  32 * Initialize RMA per EP data structures.
  33 */
  34void scif_rma_ep_init(struct scif_endpt *ep)
  35{
  36        struct scif_endpt_rma_info *rma = &ep->rma_info;
  37
  38        mutex_init(&rma->rma_lock);
  39        init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
  40                         SCIF_DMA_64BIT_PFN);
  41        spin_lock_init(&rma->tc_lock);
  42        mutex_init(&rma->mmn_lock);
  43        INIT_LIST_HEAD(&rma->reg_list);
  44        INIT_LIST_HEAD(&rma->remote_reg_list);
  45        atomic_set(&rma->tw_refcount, 0);
  46        atomic_set(&rma->tcw_refcount, 0);
  47        atomic_set(&rma->tcw_total_pages, 0);
  48        atomic_set(&rma->fence_refcount, 0);
  49
  50        rma->async_list_del = 0;
  51        rma->dma_chan = NULL;
  52        INIT_LIST_HEAD(&rma->mmn_list);
  53        INIT_LIST_HEAD(&rma->vma_list);
  54        init_waitqueue_head(&rma->markwq);
  55}
  56
  57/**
  58 * scif_rma_ep_can_uninit:
  59 * @ep: end point
  60 *
  61 * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
  62 */
  63int scif_rma_ep_can_uninit(struct scif_endpt *ep)
  64{
  65        int ret = 0;
  66
  67        mutex_lock(&ep->rma_info.rma_lock);
  68        /* Destroy RMA Info only if both lists are empty */
  69        if (list_empty(&ep->rma_info.reg_list) &&
  70            list_empty(&ep->rma_info.remote_reg_list) &&
  71            list_empty(&ep->rma_info.mmn_list) &&
  72            !atomic_read(&ep->rma_info.tw_refcount) &&
  73            !atomic_read(&ep->rma_info.tcw_refcount) &&
  74            !atomic_read(&ep->rma_info.fence_refcount))
  75                ret = 1;
  76        mutex_unlock(&ep->rma_info.rma_lock);
  77        return ret;
  78}
  79
  80/**
  81 * scif_create_pinned_pages:
  82 * @nr_pages: number of pages in window
  83 * @prot: read/write protection
  84 *
  85 * Allocate and prepare a set of pinned pages.
  86 */
  87static struct scif_pinned_pages *
  88scif_create_pinned_pages(int nr_pages, int prot)
  89{
  90        struct scif_pinned_pages *pin;
  91
  92        might_sleep();
  93        pin = scif_zalloc(sizeof(*pin));
  94        if (!pin)
  95                goto error;
  96
  97        pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
  98        if (!pin->pages)
  99                goto error_free_pinned_pages;
 100
 101        pin->prot = prot;
 102        pin->magic = SCIFEP_MAGIC;
 103        return pin;
 104
 105error_free_pinned_pages:
 106        scif_free(pin, sizeof(*pin));
 107error:
 108        return NULL;
 109}
 110
 111/**
 112 * scif_destroy_pinned_pages:
 113 * @pin: A set of pinned pages.
 114 *
 115 * Deallocate resources for pinned pages.
 116 */
 117static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
 118{
 119        int j;
 120        int writeable = pin->prot & SCIF_PROT_WRITE;
 121        int kernel = SCIF_MAP_KERNEL & pin->map_flags;
 122
 123        for (j = 0; j < pin->nr_pages; j++) {
 124                if (pin->pages[j] && !kernel) {
 125                        if (writeable)
 126                                SetPageDirty(pin->pages[j]);
 127                        put_page(pin->pages[j]);
 128                }
 129        }
 130
 131        scif_free(pin->pages,
 132                  pin->nr_pages * sizeof(*pin->pages));
 133        scif_free(pin, sizeof(*pin));
 134        return 0;
 135}
 136
 137/*
 138 * scif_create_window:
 139 * @ep: end point
 140 * @nr_pages: number of pages
 141 * @offset: registration offset
 142 * @temp: true if a temporary window is being created
 143 *
 144 * Allocate and prepare a self registration window.
 145 */
 146struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
 147                                       s64 offset, bool temp)
 148{
 149        struct scif_window *window;
 150
 151        might_sleep();
 152        window = scif_zalloc(sizeof(*window));
 153        if (!window)
 154                goto error;
 155
 156        window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
 157        if (!window->dma_addr)
 158                goto error_free_window;
 159
 160        window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
 161        if (!window->num_pages)
 162                goto error_free_window;
 163
 164        window->offset = offset;
 165        window->ep = (u64)ep;
 166        window->magic = SCIFEP_MAGIC;
 167        window->reg_state = OP_IDLE;
 168        init_waitqueue_head(&window->regwq);
 169        window->unreg_state = OP_IDLE;
 170        init_waitqueue_head(&window->unregwq);
 171        INIT_LIST_HEAD(&window->list);
 172        window->type = SCIF_WINDOW_SELF;
 173        window->temp = temp;
 174        return window;
 175
 176error_free_window:
 177        scif_free(window->dma_addr,
 178                  nr_pages * sizeof(*window->dma_addr));
 179        scif_free(window, sizeof(*window));
 180error:
 181        return NULL;
 182}
 183
 184/**
 185 * scif_destroy_incomplete_window:
 186 * @ep: end point
 187 * @window: registration window
 188 *
 189 * Deallocate resources for self window.
 190 */
 191static void scif_destroy_incomplete_window(struct scif_endpt *ep,
 192                                           struct scif_window *window)
 193{
 194        int err;
 195        int nr_pages = window->nr_pages;
 196        struct scif_allocmsg *alloc = &window->alloc_handle;
 197        struct scifmsg msg;
 198
 199retry:
 200        /* Wait for a SCIF_ALLOC_GNT/REJ message */
 201        err = wait_event_timeout(alloc->allocwq,
 202                                 alloc->state != OP_IN_PROGRESS,
 203                                 SCIF_NODE_ALIVE_TIMEOUT);
 204        if (!err && scifdev_alive(ep))
 205                goto retry;
 206
 207        mutex_lock(&ep->rma_info.rma_lock);
 208        if (alloc->state == OP_COMPLETED) {
 209                msg.uop = SCIF_FREE_VIRT;
 210                msg.src = ep->port;
 211                msg.payload[0] = ep->remote_ep;
 212                msg.payload[1] = window->alloc_handle.vaddr;
 213                msg.payload[2] = (u64)window;
 214                msg.payload[3] = SCIF_REGISTER;
 215                _scif_nodeqp_send(ep->remote_dev, &msg);
 216        }
 217        mutex_unlock(&ep->rma_info.rma_lock);
 218
 219        scif_free_window_offset(ep, window, window->offset);
 220        scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
 221        scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
 222        scif_free(window, sizeof(*window));
 223}
 224
 225/**
 226 * scif_unmap_window:
 227 * @remote_dev: SCIF remote device
 228 * @window: registration window
 229 *
 230 * Delete any DMA mappings created for a registered self window
 231 */
 232void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
 233{
 234        int j;
 235
 236        if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
 237                if (window->st) {
 238                        dma_unmap_sg(&remote_dev->sdev->dev,
 239                                     window->st->sgl, window->st->nents,
 240                                     DMA_BIDIRECTIONAL);
 241                        sg_free_table(window->st);
 242                        kfree(window->st);
 243                        window->st = NULL;
 244                }
 245        } else {
 246                for (j = 0; j < window->nr_contig_chunks; j++) {
 247                        if (window->dma_addr[j]) {
 248                                scif_unmap_single(window->dma_addr[j],
 249                                                  remote_dev,
 250                                                  window->num_pages[j] <<
 251                                                  PAGE_SHIFT);
 252                                window->dma_addr[j] = 0x0;
 253                        }
 254                }
 255        }
 256}
 257
 258static inline struct mm_struct *__scif_acquire_mm(void)
 259{
 260        if (scif_ulimit_check)
 261                return get_task_mm(current);
 262        return NULL;
 263}
 264
 265static inline void __scif_release_mm(struct mm_struct *mm)
 266{
 267        if (mm)
 268                mmput(mm);
 269}
 270
 271static inline int
 272__scif_dec_pinned_vm_lock(struct mm_struct *mm,
 273                          int nr_pages, bool try_lock)
 274{
 275        if (!mm || !nr_pages || !scif_ulimit_check)
 276                return 0;
 277        if (try_lock) {
 278                if (!down_write_trylock(&mm->mmap_sem)) {
 279                        dev_err(scif_info.mdev.this_device,
 280                                "%s %d err\n", __func__, __LINE__);
 281                        return -1;
 282                }
 283        } else {
 284                down_write(&mm->mmap_sem);
 285        }
 286        mm->pinned_vm -= nr_pages;
 287        up_write(&mm->mmap_sem);
 288        return 0;
 289}
 290
 291static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
 292                                             int nr_pages)
 293{
 294        unsigned long locked, lock_limit;
 295
 296        if (!mm || !nr_pages || !scif_ulimit_check)
 297                return 0;
 298
 299        locked = nr_pages;
 300        locked += mm->pinned_vm;
 301        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 302        if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 303                dev_err(scif_info.mdev.this_device,
 304                        "locked(%lu) > lock_limit(%lu)\n",
 305                        locked, lock_limit);
 306                return -ENOMEM;
 307        }
 308        mm->pinned_vm = locked;
 309        return 0;
 310}
 311
 312/**
 313 * scif_destroy_window:
 314 * @ep: end point
 315 * @window: registration window
 316 *
 317 * Deallocate resources for self window.
 318 */
 319int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
 320{
 321        int j;
 322        struct scif_pinned_pages *pinned_pages = window->pinned_pages;
 323        int nr_pages = window->nr_pages;
 324
 325        might_sleep();
 326        if (!window->temp && window->mm) {
 327                __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
 328                __scif_release_mm(window->mm);
 329                window->mm = NULL;
 330        }
 331
 332        scif_free_window_offset(ep, window, window->offset);
 333        scif_unmap_window(ep->remote_dev, window);
 334        /*
 335         * Decrement references for this set of pinned pages from
 336         * this window.
 337         */
 338        j = atomic_sub_return(1, &pinned_pages->ref_count);
 339        if (j < 0)
 340                dev_err(scif_info.mdev.this_device,
 341                        "%s %d incorrect ref count %d\n",
 342                        __func__, __LINE__, j);
 343        /*
 344         * If the ref count for pinned_pages is zero then someone
 345         * has already called scif_unpin_pages() for it and we should
 346         * destroy the page cache.
 347         */
 348        if (!j)
 349                scif_destroy_pinned_pages(window->pinned_pages);
 350        scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
 351        scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
 352        window->magic = 0;
 353        scif_free(window, sizeof(*window));
 354        return 0;
 355}
 356
 357/**
 358 * scif_create_remote_lookup:
 359 * @remote_dev: SCIF remote device
 360 * @window: remote window
 361 *
 362 * Allocate and prepare lookup entries for the remote
 363 * end to copy over the physical addresses.
 364 * Returns 0 on success and appropriate errno on failure.
 365 */
 366static int scif_create_remote_lookup(struct scif_dev *remote_dev,
 367                                     struct scif_window *window)
 368{
 369        int i, j, err = 0;
 370        int nr_pages = window->nr_pages;
 371        bool vmalloc_dma_phys, vmalloc_num_pages;
 372
 373        might_sleep();
 374        /* Map window */
 375        err = scif_map_single(&window->mapped_offset,
 376                              window, remote_dev, sizeof(*window));
 377        if (err)
 378                goto error_window;
 379
 380        /* Compute the number of lookup entries. 21 == 2MB Shift */
 381        window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
 382                                        ((2) * 1024 * 1024)) >> 21;
 383
 384        window->dma_addr_lookup.lookup =
 385                scif_alloc_coherent(&window->dma_addr_lookup.offset,
 386                                    remote_dev, window->nr_lookup *
 387                                    sizeof(*window->dma_addr_lookup.lookup),
 388                                    GFP_KERNEL | __GFP_ZERO);
 389        if (!window->dma_addr_lookup.lookup) {
 390                err = -ENOMEM;
 391                goto error_window;
 392        }
 393
 394        window->num_pages_lookup.lookup =
 395                scif_alloc_coherent(&window->num_pages_lookup.offset,
 396                                    remote_dev, window->nr_lookup *
 397                                    sizeof(*window->num_pages_lookup.lookup),
 398                                    GFP_KERNEL | __GFP_ZERO);
 399        if (!window->num_pages_lookup.lookup) {
 400                err = -ENOMEM;
 401                goto error_window;
 402        }
 403
 404        vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
 405        vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
 406
 407        /* Now map each of the pages containing physical addresses */
 408        for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
 409                err = scif_map_page(&window->dma_addr_lookup.lookup[j],
 410                                    vmalloc_dma_phys ?
 411                                    vmalloc_to_page(&window->dma_addr[i]) :
 412                                    virt_to_page(&window->dma_addr[i]),
 413                                    remote_dev);
 414                if (err)
 415                        goto error_window;
 416                err = scif_map_page(&window->num_pages_lookup.lookup[j],
 417                                    vmalloc_dma_phys ?
 418                                    vmalloc_to_page(&window->num_pages[i]) :
 419                                    virt_to_page(&window->num_pages[i]),
 420                                    remote_dev);
 421                if (err)
 422                        goto error_window;
 423        }
 424        return 0;
 425error_window:
 426        return err;
 427}
 428
 429/**
 430 * scif_destroy_remote_lookup:
 431 * @remote_dev: SCIF remote device
 432 * @window: remote window
 433 *
 434 * Destroy lookup entries used for the remote
 435 * end to copy over the physical addresses.
 436 */
 437static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
 438                                       struct scif_window *window)
 439{
 440        int i, j;
 441
 442        if (window->nr_lookup) {
 443                struct scif_rma_lookup *lup = &window->dma_addr_lookup;
 444                struct scif_rma_lookup *npup = &window->num_pages_lookup;
 445
 446                for (i = 0, j = 0; i < window->nr_pages;
 447                        i += SCIF_NR_ADDR_IN_PAGE, j++) {
 448                        if (lup->lookup && lup->lookup[j])
 449                                scif_unmap_single(lup->lookup[j],
 450                                                  remote_dev,
 451                                                  PAGE_SIZE);
 452                        if (npup->lookup && npup->lookup[j])
 453                                scif_unmap_single(npup->lookup[j],
 454                                                  remote_dev,
 455                                                  PAGE_SIZE);
 456                }
 457                if (lup->lookup)
 458                        scif_free_coherent(lup->lookup, lup->offset,
 459                                           remote_dev, window->nr_lookup *
 460                                           sizeof(*lup->lookup));
 461                if (npup->lookup)
 462                        scif_free_coherent(npup->lookup, npup->offset,
 463                                           remote_dev, window->nr_lookup *
 464                                           sizeof(*npup->lookup));
 465                if (window->mapped_offset)
 466                        scif_unmap_single(window->mapped_offset,
 467                                          remote_dev, sizeof(*window));
 468                window->nr_lookup = 0;
 469        }
 470}
 471
 472/**
 473 * scif_create_remote_window:
 474 * @ep: end point
 475 * @nr_pages: number of pages in window
 476 *
 477 * Allocate and prepare a remote registration window.
 478 */
 479static struct scif_window *
 480scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
 481{
 482        struct scif_window *window;
 483
 484        might_sleep();
 485        window = scif_zalloc(sizeof(*window));
 486        if (!window)
 487                goto error_ret;
 488
 489        window->magic = SCIFEP_MAGIC;
 490        window->nr_pages = nr_pages;
 491
 492        window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
 493        if (!window->dma_addr)
 494                goto error_window;
 495
 496        window->num_pages = scif_zalloc(nr_pages *
 497                                        sizeof(*window->num_pages));
 498        if (!window->num_pages)
 499                goto error_window;
 500
 501        if (scif_create_remote_lookup(scifdev, window))
 502                goto error_window;
 503
 504        window->type = SCIF_WINDOW_PEER;
 505        window->unreg_state = OP_IDLE;
 506        INIT_LIST_HEAD(&window->list);
 507        return window;
 508error_window:
 509        scif_destroy_remote_window(window);
 510error_ret:
 511        return NULL;
 512}
 513
 514/**
 515 * scif_destroy_remote_window:
 516 * @ep: end point
 517 * @window: remote registration window
 518 *
 519 * Deallocate resources for remote window.
 520 */
 521void
 522scif_destroy_remote_window(struct scif_window *window)
 523{
 524        scif_free(window->dma_addr, window->nr_pages *
 525                  sizeof(*window->dma_addr));
 526        scif_free(window->num_pages, window->nr_pages *
 527                  sizeof(*window->num_pages));
 528        window->magic = 0;
 529        scif_free(window, sizeof(*window));
 530}
 531
 532/**
 533 * scif_iommu_map: create DMA mappings if the IOMMU is enabled
 534 * @remote_dev: SCIF remote device
 535 * @window: remote registration window
 536 *
 537 * Map the physical pages using dma_map_sg(..) and then detect the number
 538 * of contiguous DMA mappings allocated
 539 */
 540static int scif_iommu_map(struct scif_dev *remote_dev,
 541                          struct scif_window *window)
 542{
 543        struct scatterlist *sg;
 544        int i, err;
 545        scif_pinned_pages_t pin = window->pinned_pages;
 546
 547        window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
 548        if (!window->st)
 549                return -ENOMEM;
 550
 551        err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
 552        if (err)
 553                return err;
 554
 555        for_each_sg(window->st->sgl, sg, window->st->nents, i)
 556                sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
 557
 558        err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
 559                         window->st->nents, DMA_BIDIRECTIONAL);
 560        if (!err)
 561                return -ENOMEM;
 562        /* Detect contiguous ranges of DMA mappings */
 563        sg = window->st->sgl;
 564        for (i = 0; sg; i++) {
 565                dma_addr_t last_da;
 566
 567                window->dma_addr[i] = sg_dma_address(sg);
 568                window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
 569                last_da = sg_dma_address(sg) + sg_dma_len(sg);
 570                while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
 571                        window->num_pages[i] +=
 572                                (sg_dma_len(sg) >> PAGE_SHIFT);
 573                        last_da = window->dma_addr[i] +
 574                                sg_dma_len(sg);
 575                }
 576                window->nr_contig_chunks++;
 577        }
 578        return 0;
 579}
 580
 581/**
 582 * scif_map_window:
 583 * @remote_dev: SCIF remote device
 584 * @window: self registration window
 585 *
 586 * Map pages of a window into the aperture/PCI.
 587 * Also determine addresses required for DMA.
 588 */
 589int
 590scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
 591{
 592        int i, j, k, err = 0, nr_contig_pages;
 593        scif_pinned_pages_t pin;
 594        phys_addr_t phys_prev, phys_curr;
 595
 596        might_sleep();
 597
 598        pin = window->pinned_pages;
 599
 600        if (intel_iommu_enabled && !scifdev_self(remote_dev))
 601                return scif_iommu_map(remote_dev, window);
 602
 603        for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
 604                phys_prev = page_to_phys(pin->pages[i]);
 605                nr_contig_pages = 1;
 606
 607                /* Detect physically contiguous chunks */
 608                for (k = i + 1; k < window->nr_pages; k++) {
 609                        phys_curr = page_to_phys(pin->pages[k]);
 610                        if (phys_curr != (phys_prev + PAGE_SIZE))
 611                                break;
 612                        phys_prev = phys_curr;
 613                        nr_contig_pages++;
 614                }
 615                window->num_pages[j] = nr_contig_pages;
 616                window->nr_contig_chunks++;
 617                if (scif_is_mgmt_node()) {
 618                        /*
 619                         * Management node has to deal with SMPT on X100 and
 620                         * hence the DMA mapping is required
 621                         */
 622                        err = scif_map_single(&window->dma_addr[j],
 623                                              phys_to_virt(page_to_phys(
 624                                                           pin->pages[i])),
 625                                              remote_dev,
 626                                              nr_contig_pages << PAGE_SHIFT);
 627                        if (err)
 628                                return err;
 629                } else {
 630                        window->dma_addr[j] = page_to_phys(pin->pages[i]);
 631                }
 632        }
 633        return err;
 634}
 635
 636/**
 637 * scif_send_scif_unregister:
 638 * @ep: end point
 639 * @window: self registration window
 640 *
 641 * Send a SCIF_UNREGISTER message.
 642 */
 643static int scif_send_scif_unregister(struct scif_endpt *ep,
 644                                     struct scif_window *window)
 645{
 646        struct scifmsg msg;
 647
 648        msg.uop = SCIF_UNREGISTER;
 649        msg.src = ep->port;
 650        msg.payload[0] = window->alloc_handle.vaddr;
 651        msg.payload[1] = (u64)window;
 652        return scif_nodeqp_send(ep->remote_dev, &msg);
 653}
 654
 655/**
 656 * scif_unregister_window:
 657 * @window: self registration window
 658 *
 659 * Send an unregistration request and wait for a response.
 660 */
 661int scif_unregister_window(struct scif_window *window)
 662{
 663        int err = 0;
 664        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
 665        bool send_msg = false;
 666
 667        might_sleep();
 668        switch (window->unreg_state) {
 669        case OP_IDLE:
 670        {
 671                window->unreg_state = OP_IN_PROGRESS;
 672                send_msg = true;
 673                /* fall through */
 674        }
 675        case OP_IN_PROGRESS:
 676        {
 677                scif_get_window(window, 1);
 678                mutex_unlock(&ep->rma_info.rma_lock);
 679                if (send_msg) {
 680                        err = scif_send_scif_unregister(ep, window);
 681                        if (err) {
 682                                window->unreg_state = OP_COMPLETED;
 683                                goto done;
 684                        }
 685                } else {
 686                        /* Return ENXIO since unregistration is in progress */
 687                        mutex_lock(&ep->rma_info.rma_lock);
 688                        return -ENXIO;
 689                }
 690retry:
 691                /* Wait for a SCIF_UNREGISTER_(N)ACK message */
 692                err = wait_event_timeout(window->unregwq,
 693                                         window->unreg_state != OP_IN_PROGRESS,
 694                                         SCIF_NODE_ALIVE_TIMEOUT);
 695                if (!err && scifdev_alive(ep))
 696                        goto retry;
 697                if (!err) {
 698                        err = -ENODEV;
 699                        window->unreg_state = OP_COMPLETED;
 700                        dev_err(scif_info.mdev.this_device,
 701                                "%s %d err %d\n", __func__, __LINE__, err);
 702                }
 703                if (err > 0)
 704                        err = 0;
 705done:
 706                mutex_lock(&ep->rma_info.rma_lock);
 707                scif_put_window(window, 1);
 708                break;
 709        }
 710        case OP_FAILED:
 711        {
 712                if (!scifdev_alive(ep)) {
 713                        err = -ENODEV;
 714                        window->unreg_state = OP_COMPLETED;
 715                }
 716                break;
 717        }
 718        case OP_COMPLETED:
 719                break;
 720        default:
 721                err = -ENODEV;
 722        }
 723
 724        if (window->unreg_state == OP_COMPLETED && window->ref_count)
 725                scif_put_window(window, window->nr_pages);
 726
 727        if (!window->ref_count) {
 728                atomic_inc(&ep->rma_info.tw_refcount);
 729                list_del_init(&window->list);
 730                scif_free_window_offset(ep, window, window->offset);
 731                mutex_unlock(&ep->rma_info.rma_lock);
 732                if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
 733                    scifdev_alive(ep)) {
 734                        scif_drain_dma_intr(ep->remote_dev->sdev,
 735                                            ep->rma_info.dma_chan);
 736                } else {
 737                        if (!__scif_dec_pinned_vm_lock(window->mm,
 738                                                       window->nr_pages, 1)) {
 739                                __scif_release_mm(window->mm);
 740                                window->mm = NULL;
 741                        }
 742                }
 743                scif_queue_for_cleanup(window, &scif_info.rma);
 744                mutex_lock(&ep->rma_info.rma_lock);
 745        }
 746        return err;
 747}
 748
 749/**
 750 * scif_send_alloc_request:
 751 * @ep: end point
 752 * @window: self registration window
 753 *
 754 * Send a remote window allocation request
 755 */
 756static int scif_send_alloc_request(struct scif_endpt *ep,
 757                                   struct scif_window *window)
 758{
 759        struct scifmsg msg;
 760        struct scif_allocmsg *alloc = &window->alloc_handle;
 761
 762        /* Set up the Alloc Handle */
 763        alloc->state = OP_IN_PROGRESS;
 764        init_waitqueue_head(&alloc->allocwq);
 765
 766        /* Send out an allocation request */
 767        msg.uop = SCIF_ALLOC_REQ;
 768        msg.payload[1] = window->nr_pages;
 769        msg.payload[2] = (u64)&window->alloc_handle;
 770        return _scif_nodeqp_send(ep->remote_dev, &msg);
 771}
 772
 773/**
 774 * scif_prep_remote_window:
 775 * @ep: end point
 776 * @window: self registration window
 777 *
 778 * Send a remote window allocation request, wait for an allocation response,
 779 * and prepares the remote window by copying over the page lists
 780 */
 781static int scif_prep_remote_window(struct scif_endpt *ep,
 782                                   struct scif_window *window)
 783{
 784        struct scifmsg msg;
 785        struct scif_window *remote_window;
 786        struct scif_allocmsg *alloc = &window->alloc_handle;
 787        dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
 788        int i = 0, j = 0;
 789        int nr_contig_chunks, loop_nr_contig_chunks;
 790        int remaining_nr_contig_chunks, nr_lookup;
 791        int err, map_err;
 792
 793        map_err = scif_map_window(ep->remote_dev, window);
 794        if (map_err)
 795                dev_err(&ep->remote_dev->sdev->dev,
 796                        "%s %d map_err %d\n", __func__, __LINE__, map_err);
 797        remaining_nr_contig_chunks = window->nr_contig_chunks;
 798        nr_contig_chunks = window->nr_contig_chunks;
 799retry:
 800        /* Wait for a SCIF_ALLOC_GNT/REJ message */
 801        err = wait_event_timeout(alloc->allocwq,
 802                                 alloc->state != OP_IN_PROGRESS,
 803                                 SCIF_NODE_ALIVE_TIMEOUT);
 804        mutex_lock(&ep->rma_info.rma_lock);
 805        /* Synchronize with the thread waking up allocwq */
 806        mutex_unlock(&ep->rma_info.rma_lock);
 807        if (!err && scifdev_alive(ep))
 808                goto retry;
 809
 810        if (!err)
 811                err = -ENODEV;
 812
 813        if (err > 0)
 814                err = 0;
 815        else
 816                return err;
 817
 818        /* Bail out. The remote end rejected this request */
 819        if (alloc->state == OP_FAILED)
 820                return -ENOMEM;
 821
 822        if (map_err) {
 823                dev_err(&ep->remote_dev->sdev->dev,
 824                        "%s %d err %d\n", __func__, __LINE__, map_err);
 825                msg.uop = SCIF_FREE_VIRT;
 826                msg.src = ep->port;
 827                msg.payload[0] = ep->remote_ep;
 828                msg.payload[1] = window->alloc_handle.vaddr;
 829                msg.payload[2] = (u64)window;
 830                msg.payload[3] = SCIF_REGISTER;
 831                spin_lock(&ep->lock);
 832                if (ep->state == SCIFEP_CONNECTED)
 833                        err = _scif_nodeqp_send(ep->remote_dev, &msg);
 834                else
 835                        err = -ENOTCONN;
 836                spin_unlock(&ep->lock);
 837                return err;
 838        }
 839
 840        remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
 841                                     ep->remote_dev);
 842
 843        /* Compute the number of lookup entries. 21 == 2MB Shift */
 844        nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
 845                          >> ilog2(SCIF_NR_ADDR_IN_PAGE);
 846
 847        dma_phys_lookup =
 848                scif_ioremap(remote_window->dma_addr_lookup.offset,
 849                             nr_lookup *
 850                             sizeof(*remote_window->dma_addr_lookup.lookup),
 851                             ep->remote_dev);
 852        num_pages_lookup =
 853                scif_ioremap(remote_window->num_pages_lookup.offset,
 854                             nr_lookup *
 855                             sizeof(*remote_window->num_pages_lookup.lookup),
 856                             ep->remote_dev);
 857
 858        while (remaining_nr_contig_chunks) {
 859                loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
 860                                              (int)SCIF_NR_ADDR_IN_PAGE);
 861                /* #1/2 - Copy  physical addresses over to the remote side */
 862
 863                /* #2/2 - Copy DMA addresses (addresses that are fed into the
 864                 * DMA engine) We transfer bus addresses which are then
 865                 * converted into a MIC physical address on the remote
 866                 * side if it is a MIC, if the remote node is a mgmt node we
 867                 * transfer the MIC physical address
 868                 */
 869                tmp = scif_ioremap(dma_phys_lookup[j],
 870                                   loop_nr_contig_chunks *
 871                                   sizeof(*window->dma_addr),
 872                                   ep->remote_dev);
 873                tmp1 = scif_ioremap(num_pages_lookup[j],
 874                                    loop_nr_contig_chunks *
 875                                    sizeof(*window->num_pages),
 876                                    ep->remote_dev);
 877                if (scif_is_mgmt_node()) {
 878                        memcpy_toio((void __force __iomem *)tmp,
 879                                    &window->dma_addr[i], loop_nr_contig_chunks
 880                                    * sizeof(*window->dma_addr));
 881                        memcpy_toio((void __force __iomem *)tmp1,
 882                                    &window->num_pages[i], loop_nr_contig_chunks
 883                                    * sizeof(*window->num_pages));
 884                } else {
 885                        if (scifdev_is_p2p(ep->remote_dev)) {
 886                                /*
 887                                 * add remote node's base address for this node
 888                                 * to convert it into a MIC address
 889                                 */
 890                                int m;
 891                                dma_addr_t dma_addr;
 892
 893                                for (m = 0; m < loop_nr_contig_chunks; m++) {
 894                                        dma_addr = window->dma_addr[i + m] +
 895                                                ep->remote_dev->base_addr;
 896                                        writeq(dma_addr,
 897                                               (void __force __iomem *)&tmp[m]);
 898                                }
 899                                memcpy_toio((void __force __iomem *)tmp1,
 900                                            &window->num_pages[i],
 901                                            loop_nr_contig_chunks
 902                                            * sizeof(*window->num_pages));
 903                        } else {
 904                                /* Mgmt node or loopback - transfer DMA
 905                                 * addresses as is, this is the same as a
 906                                 * MIC physical address (we use the dma_addr
 907                                 * and not the phys_addr array since the
 908                                 * phys_addr is only setup if there is a mmap()
 909                                 * request from the mgmt node)
 910                                 */
 911                                memcpy_toio((void __force __iomem *)tmp,
 912                                            &window->dma_addr[i],
 913                                            loop_nr_contig_chunks *
 914                                            sizeof(*window->dma_addr));
 915                                memcpy_toio((void __force __iomem *)tmp1,
 916                                            &window->num_pages[i],
 917                                            loop_nr_contig_chunks *
 918                                            sizeof(*window->num_pages));
 919                        }
 920                }
 921                remaining_nr_contig_chunks -= loop_nr_contig_chunks;
 922                i += loop_nr_contig_chunks;
 923                j++;
 924                scif_iounmap(tmp, loop_nr_contig_chunks *
 925                             sizeof(*window->dma_addr), ep->remote_dev);
 926                scif_iounmap(tmp1, loop_nr_contig_chunks *
 927                             sizeof(*window->num_pages), ep->remote_dev);
 928        }
 929
 930        /* Prepare the remote window for the peer */
 931        remote_window->peer_window = (u64)window;
 932        remote_window->offset = window->offset;
 933        remote_window->prot = window->prot;
 934        remote_window->nr_contig_chunks = nr_contig_chunks;
 935        remote_window->ep = ep->remote_ep;
 936        scif_iounmap(num_pages_lookup,
 937                     nr_lookup *
 938                     sizeof(*remote_window->num_pages_lookup.lookup),
 939                     ep->remote_dev);
 940        scif_iounmap(dma_phys_lookup,
 941                     nr_lookup *
 942                     sizeof(*remote_window->dma_addr_lookup.lookup),
 943                     ep->remote_dev);
 944        scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
 945        window->peer_window = alloc->vaddr;
 946        return err;
 947}
 948
 949/**
 950 * scif_send_scif_register:
 951 * @ep: end point
 952 * @window: self registration window
 953 *
 954 * Send a SCIF_REGISTER message if EP is connected and wait for a
 955 * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
 956 * message so that the peer can free its remote window allocated earlier.
 957 */
 958static int scif_send_scif_register(struct scif_endpt *ep,
 959                                   struct scif_window *window)
 960{
 961        int err = 0;
 962        struct scifmsg msg;
 963
 964        msg.src = ep->port;
 965        msg.payload[0] = ep->remote_ep;
 966        msg.payload[1] = window->alloc_handle.vaddr;
 967        msg.payload[2] = (u64)window;
 968        spin_lock(&ep->lock);
 969        if (ep->state == SCIFEP_CONNECTED) {
 970                msg.uop = SCIF_REGISTER;
 971                window->reg_state = OP_IN_PROGRESS;
 972                err = _scif_nodeqp_send(ep->remote_dev, &msg);
 973                spin_unlock(&ep->lock);
 974                if (!err) {
 975retry:
 976                        /* Wait for a SCIF_REGISTER_(N)ACK message */
 977                        err = wait_event_timeout(window->regwq,
 978                                                 window->reg_state !=
 979                                                 OP_IN_PROGRESS,
 980                                                 SCIF_NODE_ALIVE_TIMEOUT);
 981                        if (!err && scifdev_alive(ep))
 982                                goto retry;
 983                        err = !err ? -ENODEV : 0;
 984                        if (window->reg_state == OP_FAILED)
 985                                err = -ENOTCONN;
 986                }
 987        } else {
 988                msg.uop = SCIF_FREE_VIRT;
 989                msg.payload[3] = SCIF_REGISTER;
 990                err = _scif_nodeqp_send(ep->remote_dev, &msg);
 991                spin_unlock(&ep->lock);
 992                if (!err)
 993                        err = -ENOTCONN;
 994        }
 995        return err;
 996}
 997
 998/**
 999 * scif_get_window_offset:
1000 * @ep: end point descriptor
1001 * @flags: flags
1002 * @offset: offset hint
1003 * @num_pages: number of pages
1004 * @out_offset: computed offset returned by reference.
1005 *
1006 * Compute/Claim a new offset for this EP.
1007 */
1008int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
1009                           int num_pages, s64 *out_offset)
1010{
1011        s64 page_index;
1012        struct iova *iova_ptr;
1013        int err = 0;
1014
1015        if (flags & SCIF_MAP_FIXED) {
1016                page_index = SCIF_IOVA_PFN(offset);
1017                iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1018                                        page_index + num_pages - 1);
1019                if (!iova_ptr)
1020                        err = -EADDRINUSE;
1021        } else {
1022                iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1023                                      SCIF_DMA_63BIT_PFN - 1, 0);
1024                if (!iova_ptr)
1025                        err = -ENOMEM;
1026        }
1027        if (!err)
1028                *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1029        return err;
1030}
1031
1032/**
1033 * scif_free_window_offset:
1034 * @ep: end point descriptor
1035 * @window: registration window
1036 * @offset: Offset to be freed
1037 *
1038 * Free offset for this EP. The callee is supposed to grab
1039 * the RMA mutex before calling this API.
1040 */
1041void scif_free_window_offset(struct scif_endpt *ep,
1042                             struct scif_window *window, s64 offset)
1043{
1044        if ((window && !window->offset_freed) || !window) {
1045                free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1046                if (window)
1047                        window->offset_freed = true;
1048        }
1049}
1050
1051/**
1052 * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1053 * @msg:        Interrupt message
1054 *
1055 * Remote side is requesting a memory allocation.
1056 */
1057void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1058{
1059        int err;
1060        struct scif_window *window = NULL;
1061        int nr_pages = msg->payload[1];
1062
1063        window = scif_create_remote_window(scifdev, nr_pages);
1064        if (!window) {
1065                err = -ENOMEM;
1066                goto error;
1067        }
1068
1069        /* The peer's allocation request is granted */
1070        msg->uop = SCIF_ALLOC_GNT;
1071        msg->payload[0] = (u64)window;
1072        msg->payload[1] = window->mapped_offset;
1073        err = scif_nodeqp_send(scifdev, msg);
1074        if (err)
1075                scif_destroy_remote_window(window);
1076        return;
1077error:
1078        /* The peer's allocation request is rejected */
1079        dev_err(&scifdev->sdev->dev,
1080                "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1081                __func__, __LINE__, err, window, nr_pages);
1082        msg->uop = SCIF_ALLOC_REJ;
1083        scif_nodeqp_send(scifdev, msg);
1084}
1085
1086/**
1087 * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1088 * @msg:        Interrupt message
1089 *
1090 * Remote side responded to a memory allocation.
1091 */
1092void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1093{
1094        struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1095        struct scif_window *window = container_of(handle, struct scif_window,
1096                                                  alloc_handle);
1097        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1098
1099        mutex_lock(&ep->rma_info.rma_lock);
1100        handle->vaddr = msg->payload[0];
1101        handle->phys_addr = msg->payload[1];
1102        if (msg->uop == SCIF_ALLOC_GNT)
1103                handle->state = OP_COMPLETED;
1104        else
1105                handle->state = OP_FAILED;
1106        wake_up(&handle->allocwq);
1107        mutex_unlock(&ep->rma_info.rma_lock);
1108}
1109
1110/**
1111 * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1112 * @msg:        Interrupt message
1113 *
1114 * Free up memory kmalloc'd earlier.
1115 */
1116void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1117{
1118        struct scif_window *window = (struct scif_window *)msg->payload[1];
1119
1120        scif_destroy_remote_window(window);
1121}
1122
1123static void
1124scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1125{
1126        int j;
1127        struct scif_hw_dev *sdev = dev->sdev;
1128        phys_addr_t apt_base = 0;
1129
1130        /*
1131         * Add the aperture base if the DMA address is not card relative
1132         * since the DMA addresses need to be an offset into the bar
1133         */
1134        if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1135            sdev->aper && !sdev->card_rel_da)
1136                apt_base = sdev->aper->pa;
1137        else
1138                return;
1139
1140        for (j = 0; j < window->nr_contig_chunks; j++) {
1141                if (window->num_pages[j])
1142                        window->dma_addr[j] += apt_base;
1143                else
1144                        break;
1145        }
1146}
1147
1148/**
1149 * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1150 * @msg:        Interrupt message
1151 *
1152 * Update remote window list with a new registered window.
1153 */
1154void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1155{
1156        struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1157        struct scif_window *window =
1158                (struct scif_window *)msg->payload[1];
1159
1160        mutex_lock(&ep->rma_info.rma_lock);
1161        spin_lock(&ep->lock);
1162        if (ep->state == SCIFEP_CONNECTED) {
1163                msg->uop = SCIF_REGISTER_ACK;
1164                scif_nodeqp_send(ep->remote_dev, msg);
1165                scif_fixup_aper_base(ep->remote_dev, window);
1166                /* No further failures expected. Insert new window */
1167                scif_insert_window(window, &ep->rma_info.remote_reg_list);
1168        } else {
1169                msg->uop = SCIF_REGISTER_NACK;
1170                scif_nodeqp_send(ep->remote_dev, msg);
1171        }
1172        spin_unlock(&ep->lock);
1173        mutex_unlock(&ep->rma_info.rma_lock);
1174        /* free up any lookup resources now that page lists are transferred */
1175        scif_destroy_remote_lookup(ep->remote_dev, window);
1176        /*
1177         * We could not insert the window but we need to
1178         * destroy the window.
1179         */
1180        if (msg->uop == SCIF_REGISTER_NACK)
1181                scif_destroy_remote_window(window);
1182}
1183
1184/**
1185 * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1186 * @msg:        Interrupt message
1187 *
1188 * Remove window from remote registration list;
1189 */
1190void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1191{
1192        struct scif_rma_req req;
1193        struct scif_window *window = NULL;
1194        struct scif_window *recv_window =
1195                (struct scif_window *)msg->payload[0];
1196        struct scif_endpt *ep;
1197        int del_window = 0;
1198
1199        ep = (struct scif_endpt *)recv_window->ep;
1200        req.out_window = &window;
1201        req.offset = recv_window->offset;
1202        req.prot = 0;
1203        req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1204        req.type = SCIF_WINDOW_FULL;
1205        req.head = &ep->rma_info.remote_reg_list;
1206        msg->payload[0] = ep->remote_ep;
1207
1208        mutex_lock(&ep->rma_info.rma_lock);
1209        /* Does a valid window exist? */
1210        if (scif_query_window(&req)) {
1211                dev_err(&scifdev->sdev->dev,
1212                        "%s %d -ENXIO\n", __func__, __LINE__);
1213                msg->uop = SCIF_UNREGISTER_ACK;
1214                goto error;
1215        }
1216        if (window) {
1217                if (window->ref_count)
1218                        scif_put_window(window, window->nr_pages);
1219                else
1220                        dev_err(&scifdev->sdev->dev,
1221                                "%s %d ref count should be +ve\n",
1222                                __func__, __LINE__);
1223                window->unreg_state = OP_COMPLETED;
1224                if (!window->ref_count) {
1225                        msg->uop = SCIF_UNREGISTER_ACK;
1226                        atomic_inc(&ep->rma_info.tw_refcount);
1227                        ep->rma_info.async_list_del = 1;
1228                        list_del_init(&window->list);
1229                        del_window = 1;
1230                } else {
1231                        /* NACK! There are valid references to this window */
1232                        msg->uop = SCIF_UNREGISTER_NACK;
1233                }
1234        } else {
1235                /* The window did not make its way to the list at all. ACK */
1236                msg->uop = SCIF_UNREGISTER_ACK;
1237                scif_destroy_remote_window(recv_window);
1238        }
1239error:
1240        mutex_unlock(&ep->rma_info.rma_lock);
1241        if (del_window)
1242                scif_drain_dma_intr(ep->remote_dev->sdev,
1243                                    ep->rma_info.dma_chan);
1244        scif_nodeqp_send(ep->remote_dev, msg);
1245        if (del_window)
1246                scif_queue_for_cleanup(window, &scif_info.rma);
1247}
1248
1249/**
1250 * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1251 * @msg:        Interrupt message
1252 *
1253 * Wake up the window waiting to complete registration.
1254 */
1255void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1256{
1257        struct scif_window *window =
1258                (struct scif_window *)msg->payload[2];
1259        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1260
1261        mutex_lock(&ep->rma_info.rma_lock);
1262        window->reg_state = OP_COMPLETED;
1263        wake_up(&window->regwq);
1264        mutex_unlock(&ep->rma_info.rma_lock);
1265}
1266
1267/**
1268 * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1269 * @msg:        Interrupt message
1270 *
1271 * Wake up the window waiting to inform it that registration
1272 * cannot be completed.
1273 */
1274void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1275{
1276        struct scif_window *window =
1277                (struct scif_window *)msg->payload[2];
1278        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1279
1280        mutex_lock(&ep->rma_info.rma_lock);
1281        window->reg_state = OP_FAILED;
1282        wake_up(&window->regwq);
1283        mutex_unlock(&ep->rma_info.rma_lock);
1284}
1285
1286/**
1287 * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1288 * @msg:        Interrupt message
1289 *
1290 * Wake up the window waiting to complete unregistration.
1291 */
1292void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1293{
1294        struct scif_window *window =
1295                (struct scif_window *)msg->payload[1];
1296        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1297
1298        mutex_lock(&ep->rma_info.rma_lock);
1299        window->unreg_state = OP_COMPLETED;
1300        wake_up(&window->unregwq);
1301        mutex_unlock(&ep->rma_info.rma_lock);
1302}
1303
1304/**
1305 * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1306 * @msg:        Interrupt message
1307 *
1308 * Wake up the window waiting to inform it that unregistration
1309 * cannot be completed immediately.
1310 */
1311void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1312{
1313        struct scif_window *window =
1314                (struct scif_window *)msg->payload[1];
1315        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1316
1317        mutex_lock(&ep->rma_info.rma_lock);
1318        window->unreg_state = OP_FAILED;
1319        wake_up(&window->unregwq);
1320        mutex_unlock(&ep->rma_info.rma_lock);
1321}
1322
1323int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1324                     int map_flags, scif_pinned_pages_t *pages)
1325{
1326        struct scif_pinned_pages *pinned_pages;
1327        int nr_pages, err = 0, i;
1328        bool vmalloc_addr = false;
1329        bool try_upgrade = false;
1330        int prot = *out_prot;
1331        int ulimit = 0;
1332        struct mm_struct *mm = NULL;
1333
1334        /* Unsupported flags */
1335        if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1336                return -EINVAL;
1337        ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1338
1339        /* Unsupported protection requested */
1340        if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1341                return -EINVAL;
1342
1343        /* addr/len must be page aligned. len should be non zero */
1344        if (!len ||
1345            (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1346            (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1347                return -EINVAL;
1348
1349        might_sleep();
1350
1351        nr_pages = len >> PAGE_SHIFT;
1352
1353        /* Allocate a set of pinned pages */
1354        pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1355        if (!pinned_pages)
1356                return -ENOMEM;
1357
1358        if (map_flags & SCIF_MAP_KERNEL) {
1359                if (is_vmalloc_addr(addr))
1360                        vmalloc_addr = true;
1361
1362                for (i = 0; i < nr_pages; i++) {
1363                        if (vmalloc_addr)
1364                                pinned_pages->pages[i] =
1365                                        vmalloc_to_page(addr + (i * PAGE_SIZE));
1366                        else
1367                                pinned_pages->pages[i] =
1368                                        virt_to_page(addr + (i * PAGE_SIZE));
1369                }
1370                pinned_pages->nr_pages = nr_pages;
1371                pinned_pages->map_flags = SCIF_MAP_KERNEL;
1372        } else {
1373                /*
1374                 * SCIF supports registration caching. If a registration has
1375                 * been requested with read only permissions, then we try
1376                 * to pin the pages with RW permissions so that a subsequent
1377                 * transfer with RW permission can hit the cache instead of
1378                 * invalidating it. If the upgrade fails with RW then we
1379                 * revert back to R permission and retry
1380                 */
1381                if (prot == SCIF_PROT_READ)
1382                        try_upgrade = true;
1383                prot |= SCIF_PROT_WRITE;
1384retry:
1385                mm = current->mm;
1386                down_write(&mm->mmap_sem);
1387                if (ulimit) {
1388                        err = __scif_check_inc_pinned_vm(mm, nr_pages);
1389                        if (err) {
1390                                up_write(&mm->mmap_sem);
1391                                pinned_pages->nr_pages = 0;
1392                                goto error_unmap;
1393                        }
1394                }
1395
1396                pinned_pages->nr_pages = get_user_pages(
1397                                (u64)addr,
1398                                nr_pages,
1399                                !!(prot & SCIF_PROT_WRITE),
1400                                0,
1401                                pinned_pages->pages,
1402                                NULL);
1403                up_write(&mm->mmap_sem);
1404                if (nr_pages != pinned_pages->nr_pages) {
1405                        if (try_upgrade) {
1406                                if (ulimit)
1407                                        __scif_dec_pinned_vm_lock(mm,
1408                                                                  nr_pages, 0);
1409                                /* Roll back any pinned pages */
1410                                for (i = 0; i < pinned_pages->nr_pages; i++) {
1411                                        if (pinned_pages->pages[i])
1412                                                put_page(
1413                                                pinned_pages->pages[i]);
1414                                }
1415                                prot &= ~SCIF_PROT_WRITE;
1416                                try_upgrade = false;
1417                                goto retry;
1418                        }
1419                }
1420                pinned_pages->map_flags = 0;
1421        }
1422
1423        if (pinned_pages->nr_pages < nr_pages) {
1424                err = -EFAULT;
1425                pinned_pages->nr_pages = nr_pages;
1426                goto dec_pinned;
1427        }
1428
1429        *out_prot = prot;
1430        atomic_set(&pinned_pages->ref_count, 1);
1431        *pages = pinned_pages;
1432        return err;
1433dec_pinned:
1434        if (ulimit)
1435                __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
1436        /* Something went wrong! Rollback */
1437error_unmap:
1438        pinned_pages->nr_pages = nr_pages;
1439        scif_destroy_pinned_pages(pinned_pages);
1440        *pages = NULL;
1441        dev_dbg(scif_info.mdev.this_device,
1442                "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1443        return err;
1444}
1445
1446int scif_pin_pages(void *addr, size_t len, int prot,
1447                   int map_flags, scif_pinned_pages_t *pages)
1448{
1449        return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1450}
1451EXPORT_SYMBOL_GPL(scif_pin_pages);
1452
1453int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1454{
1455        int err = 0, ret;
1456
1457        if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1458                return -EINVAL;
1459
1460        ret = atomic_sub_return(1, &pinned_pages->ref_count);
1461        if (ret < 0) {
1462                dev_err(scif_info.mdev.this_device,
1463                        "%s %d scif_unpin_pages called without pinning? rc %d\n",
1464                        __func__, __LINE__, ret);
1465                return -EINVAL;
1466        }
1467        /*
1468         * Destroy the window if the ref count for this set of pinned
1469         * pages has dropped to zero. If it is positive then there is
1470         * a valid registered window which is backed by these pages and
1471         * it will be destroyed once all such windows are unregistered.
1472         */
1473        if (!ret)
1474                err = scif_destroy_pinned_pages(pinned_pages);
1475
1476        return err;
1477}
1478EXPORT_SYMBOL_GPL(scif_unpin_pages);
1479
1480static inline void
1481scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1482{
1483        mutex_lock(&ep->rma_info.rma_lock);
1484        scif_insert_window(window, &ep->rma_info.reg_list);
1485        mutex_unlock(&ep->rma_info.rma_lock);
1486}
1487
1488off_t scif_register_pinned_pages(scif_epd_t epd,
1489                                 scif_pinned_pages_t pinned_pages,
1490                                 off_t offset, int map_flags)
1491{
1492        struct scif_endpt *ep = (struct scif_endpt *)epd;
1493        s64 computed_offset;
1494        struct scif_window *window;
1495        int err;
1496        size_t len;
1497        struct device *spdev;
1498
1499        /* Unsupported flags */
1500        if (map_flags & ~SCIF_MAP_FIXED)
1501                return -EINVAL;
1502
1503        len = pinned_pages->nr_pages << PAGE_SHIFT;
1504
1505        /*
1506         * Offset is not page aligned/negative or offset+len
1507         * wraps around with SCIF_MAP_FIXED.
1508         */
1509        if ((map_flags & SCIF_MAP_FIXED) &&
1510            ((ALIGN(offset, PAGE_SIZE) != offset) ||
1511            (offset < 0) ||
1512            (len > LONG_MAX - offset)))
1513                return -EINVAL;
1514
1515        might_sleep();
1516
1517        err = scif_verify_epd(ep);
1518        if (err)
1519                return err;
1520        /*
1521         * It is an error to pass pinned_pages to scif_register_pinned_pages()
1522         * after calling scif_unpin_pages().
1523         */
1524        if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1525                return -EINVAL;
1526
1527        /* Compute the offset for this registration */
1528        err = scif_get_window_offset(ep, map_flags, offset,
1529                                     len, &computed_offset);
1530        if (err) {
1531                atomic_sub(1, &pinned_pages->ref_count);
1532                return err;
1533        }
1534
1535        /* Allocate and prepare self registration window */
1536        window = scif_create_window(ep, pinned_pages->nr_pages,
1537                                    computed_offset, false);
1538        if (!window) {
1539                atomic_sub(1, &pinned_pages->ref_count);
1540                scif_free_window_offset(ep, NULL, computed_offset);
1541                return -ENOMEM;
1542        }
1543
1544        window->pinned_pages = pinned_pages;
1545        window->nr_pages = pinned_pages->nr_pages;
1546        window->prot = pinned_pages->prot;
1547
1548        spdev = scif_get_peer_dev(ep->remote_dev);
1549        if (IS_ERR(spdev)) {
1550                err = PTR_ERR(spdev);
1551                scif_destroy_window(ep, window);
1552                return err;
1553        }
1554        err = scif_send_alloc_request(ep, window);
1555        if (err) {
1556                dev_err(&ep->remote_dev->sdev->dev,
1557                        "%s %d err %d\n", __func__, __LINE__, err);
1558                goto error_unmap;
1559        }
1560
1561        /* Prepare the remote registration window */
1562        err = scif_prep_remote_window(ep, window);
1563        if (err) {
1564                dev_err(&ep->remote_dev->sdev->dev,
1565                        "%s %d err %d\n", __func__, __LINE__, err);
1566                goto error_unmap;
1567        }
1568
1569        /* Tell the peer about the new window */
1570        err = scif_send_scif_register(ep, window);
1571        if (err) {
1572                dev_err(&ep->remote_dev->sdev->dev,
1573                        "%s %d err %d\n", __func__, __LINE__, err);
1574                goto error_unmap;
1575        }
1576
1577        scif_put_peer_dev(spdev);
1578        /* No further failures expected. Insert new window */
1579        scif_insert_local_window(window, ep);
1580        return computed_offset;
1581error_unmap:
1582        scif_destroy_window(ep, window);
1583        scif_put_peer_dev(spdev);
1584        dev_err(&ep->remote_dev->sdev->dev,
1585                "%s %d err %d\n", __func__, __LINE__, err);
1586        return err;
1587}
1588EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1589
1590off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1591                    int prot, int map_flags)
1592{
1593        scif_pinned_pages_t pinned_pages;
1594        off_t err;
1595        struct scif_endpt *ep = (struct scif_endpt *)epd;
1596        s64 computed_offset;
1597        struct scif_window *window;
1598        struct mm_struct *mm = NULL;
1599        struct device *spdev;
1600
1601        dev_dbg(scif_info.mdev.this_device,
1602                "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1603                epd, addr, len, offset, prot, map_flags);
1604        /* Unsupported flags */
1605        if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1606                return -EINVAL;
1607
1608        /*
1609         * Offset is not page aligned/negative or offset+len
1610         * wraps around with SCIF_MAP_FIXED.
1611         */
1612        if ((map_flags & SCIF_MAP_FIXED) &&
1613            ((ALIGN(offset, PAGE_SIZE) != offset) ||
1614            (offset < 0) ||
1615            (len > LONG_MAX - offset)))
1616                return -EINVAL;
1617
1618        /* Unsupported protection requested */
1619        if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1620                return -EINVAL;
1621
1622        /* addr/len must be page aligned. len should be non zero */
1623        if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1624            (ALIGN(len, PAGE_SIZE) != len))
1625                return -EINVAL;
1626
1627        might_sleep();
1628
1629        err = scif_verify_epd(ep);
1630        if (err)
1631                return err;
1632
1633        /* Compute the offset for this registration */
1634        err = scif_get_window_offset(ep, map_flags, offset,
1635                                     len >> PAGE_SHIFT, &computed_offset);
1636        if (err)
1637                return err;
1638
1639        spdev = scif_get_peer_dev(ep->remote_dev);
1640        if (IS_ERR(spdev)) {
1641                err = PTR_ERR(spdev);
1642                scif_free_window_offset(ep, NULL, computed_offset);
1643                return err;
1644        }
1645        /* Allocate and prepare self registration window */
1646        window = scif_create_window(ep, len >> PAGE_SHIFT,
1647                                    computed_offset, false);
1648        if (!window) {
1649                scif_free_window_offset(ep, NULL, computed_offset);
1650                scif_put_peer_dev(spdev);
1651                return -ENOMEM;
1652        }
1653
1654        window->nr_pages = len >> PAGE_SHIFT;
1655
1656        err = scif_send_alloc_request(ep, window);
1657        if (err) {
1658                scif_destroy_incomplete_window(ep, window);
1659                scif_put_peer_dev(spdev);
1660                return err;
1661        }
1662
1663        if (!(map_flags & SCIF_MAP_KERNEL)) {
1664                mm = __scif_acquire_mm();
1665                map_flags |= SCIF_MAP_ULIMIT;
1666        }
1667        /* Pin down the pages */
1668        err = __scif_pin_pages(addr, len, &prot,
1669                               map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1670                               &pinned_pages);
1671        if (err) {
1672                scif_destroy_incomplete_window(ep, window);
1673                __scif_release_mm(mm);
1674                goto error;
1675        }
1676
1677        window->pinned_pages = pinned_pages;
1678        window->prot = pinned_pages->prot;
1679        window->mm = mm;
1680
1681        /* Prepare the remote registration window */
1682        err = scif_prep_remote_window(ep, window);
1683        if (err) {
1684                dev_err(&ep->remote_dev->sdev->dev,
1685                        "%s %d err %ld\n", __func__, __LINE__, err);
1686                goto error_unmap;
1687        }
1688
1689        /* Tell the peer about the new window */
1690        err = scif_send_scif_register(ep, window);
1691        if (err) {
1692                dev_err(&ep->remote_dev->sdev->dev,
1693                        "%s %d err %ld\n", __func__, __LINE__, err);
1694                goto error_unmap;
1695        }
1696
1697        scif_put_peer_dev(spdev);
1698        /* No further failures expected. Insert new window */
1699        scif_insert_local_window(window, ep);
1700        dev_dbg(&ep->remote_dev->sdev->dev,
1701                "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1702                epd, addr, len, computed_offset);
1703        return computed_offset;
1704error_unmap:
1705        scif_destroy_window(ep, window);
1706error:
1707        scif_put_peer_dev(spdev);
1708        dev_err(&ep->remote_dev->sdev->dev,
1709                "%s %d err %ld\n", __func__, __LINE__, err);
1710        return err;
1711}
1712EXPORT_SYMBOL_GPL(scif_register);
1713
1714int
1715scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1716{
1717        struct scif_endpt *ep = (struct scif_endpt *)epd;
1718        struct scif_window *window = NULL;
1719        struct scif_rma_req req;
1720        int nr_pages, err;
1721        struct device *spdev;
1722
1723        dev_dbg(scif_info.mdev.this_device,
1724                "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1725                ep, offset, len);
1726        /* len must be page aligned. len should be non zero */
1727        if (!len ||
1728            (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1729                return -EINVAL;
1730
1731        /* Offset is not page aligned or offset+len wraps around */
1732        if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1733            (offset < 0) ||
1734            (len > LONG_MAX - offset))
1735                return -EINVAL;
1736
1737        err = scif_verify_epd(ep);
1738        if (err)
1739                return err;
1740
1741        might_sleep();
1742        nr_pages = len >> PAGE_SHIFT;
1743
1744        req.out_window = &window;
1745        req.offset = offset;
1746        req.prot = 0;
1747        req.nr_bytes = len;
1748        req.type = SCIF_WINDOW_FULL;
1749        req.head = &ep->rma_info.reg_list;
1750
1751        spdev = scif_get_peer_dev(ep->remote_dev);
1752        if (IS_ERR(spdev)) {
1753                err = PTR_ERR(spdev);
1754                return err;
1755        }
1756        mutex_lock(&ep->rma_info.rma_lock);
1757        /* Does a valid window exist? */
1758        err = scif_query_window(&req);
1759        if (err) {
1760                dev_err(&ep->remote_dev->sdev->dev,
1761                        "%s %d err %d\n", __func__, __LINE__, err);
1762                goto error;
1763        }
1764        /* Unregister all the windows in this range */
1765        err = scif_rma_list_unregister(window, offset, nr_pages);
1766        if (err)
1767                dev_err(&ep->remote_dev->sdev->dev,
1768                        "%s %d err %d\n", __func__, __LINE__, err);
1769error:
1770        mutex_unlock(&ep->rma_info.rma_lock);
1771        scif_put_peer_dev(spdev);
1772        return err;
1773}
1774EXPORT_SYMBOL_GPL(scif_unregister);
1775