linux/drivers/misc/mic/scif/scif_rma.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2015 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * Intel SCIF driver.
  16 *
  17 */
  18#include <linux/dma_remapping.h>
  19#include <linux/pagemap.h>
  20#include <linux/sched/mm.h>
  21#include <linux/sched/signal.h>
  22
  23#include "scif_main.h"
  24#include "scif_map.h"
  25
  26/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
  27#define SCIF_MAP_ULIMIT 0x40
  28
  29bool scif_ulimit_check = 1;
  30
  31/**
  32 * scif_rma_ep_init:
  33 * @ep: end point
  34 *
  35 * Initialize RMA per EP data structures.
  36 */
  37void scif_rma_ep_init(struct scif_endpt *ep)
  38{
  39        struct scif_endpt_rma_info *rma = &ep->rma_info;
  40
  41        mutex_init(&rma->rma_lock);
  42        init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
  43                         SCIF_DMA_64BIT_PFN);
  44        spin_lock_init(&rma->tc_lock);
  45        mutex_init(&rma->mmn_lock);
  46        INIT_LIST_HEAD(&rma->reg_list);
  47        INIT_LIST_HEAD(&rma->remote_reg_list);
  48        atomic_set(&rma->tw_refcount, 0);
  49        atomic_set(&rma->tcw_refcount, 0);
  50        atomic_set(&rma->tcw_total_pages, 0);
  51        atomic_set(&rma->fence_refcount, 0);
  52
  53        rma->async_list_del = 0;
  54        rma->dma_chan = NULL;
  55        INIT_LIST_HEAD(&rma->mmn_list);
  56        INIT_LIST_HEAD(&rma->vma_list);
  57        init_waitqueue_head(&rma->markwq);
  58}
  59
  60/**
  61 * scif_rma_ep_can_uninit:
  62 * @ep: end point
  63 *
  64 * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
  65 */
  66int scif_rma_ep_can_uninit(struct scif_endpt *ep)
  67{
  68        int ret = 0;
  69
  70        mutex_lock(&ep->rma_info.rma_lock);
  71        /* Destroy RMA Info only if both lists are empty */
  72        if (list_empty(&ep->rma_info.reg_list) &&
  73            list_empty(&ep->rma_info.remote_reg_list) &&
  74            list_empty(&ep->rma_info.mmn_list) &&
  75            !atomic_read(&ep->rma_info.tw_refcount) &&
  76            !atomic_read(&ep->rma_info.tcw_refcount) &&
  77            !atomic_read(&ep->rma_info.fence_refcount))
  78                ret = 1;
  79        mutex_unlock(&ep->rma_info.rma_lock);
  80        return ret;
  81}
  82
  83/**
  84 * scif_create_pinned_pages:
  85 * @nr_pages: number of pages in window
  86 * @prot: read/write protection
  87 *
  88 * Allocate and prepare a set of pinned pages.
  89 */
  90static struct scif_pinned_pages *
  91scif_create_pinned_pages(int nr_pages, int prot)
  92{
  93        struct scif_pinned_pages *pin;
  94
  95        might_sleep();
  96        pin = scif_zalloc(sizeof(*pin));
  97        if (!pin)
  98                goto error;
  99
 100        pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
 101        if (!pin->pages)
 102                goto error_free_pinned_pages;
 103
 104        pin->prot = prot;
 105        pin->magic = SCIFEP_MAGIC;
 106        return pin;
 107
 108error_free_pinned_pages:
 109        scif_free(pin, sizeof(*pin));
 110error:
 111        return NULL;
 112}
 113
 114/**
 115 * scif_destroy_pinned_pages:
 116 * @pin: A set of pinned pages.
 117 *
 118 * Deallocate resources for pinned pages.
 119 */
 120static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
 121{
 122        int j;
 123        int writeable = pin->prot & SCIF_PROT_WRITE;
 124        int kernel = SCIF_MAP_KERNEL & pin->map_flags;
 125
 126        for (j = 0; j < pin->nr_pages; j++) {
 127                if (pin->pages[j] && !kernel) {
 128                        if (writeable)
 129                                SetPageDirty(pin->pages[j]);
 130                        put_page(pin->pages[j]);
 131                }
 132        }
 133
 134        scif_free(pin->pages,
 135                  pin->nr_pages * sizeof(*pin->pages));
 136        scif_free(pin, sizeof(*pin));
 137        return 0;
 138}
 139
 140/*
 141 * scif_create_window:
 142 * @ep: end point
 143 * @nr_pages: number of pages
 144 * @offset: registration offset
 145 * @temp: true if a temporary window is being created
 146 *
 147 * Allocate and prepare a self registration window.
 148 */
 149struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
 150                                       s64 offset, bool temp)
 151{
 152        struct scif_window *window;
 153
 154        might_sleep();
 155        window = scif_zalloc(sizeof(*window));
 156        if (!window)
 157                goto error;
 158
 159        window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
 160        if (!window->dma_addr)
 161                goto error_free_window;
 162
 163        window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
 164        if (!window->num_pages)
 165                goto error_free_window;
 166
 167        window->offset = offset;
 168        window->ep = (u64)ep;
 169        window->magic = SCIFEP_MAGIC;
 170        window->reg_state = OP_IDLE;
 171        init_waitqueue_head(&window->regwq);
 172        window->unreg_state = OP_IDLE;
 173        init_waitqueue_head(&window->unregwq);
 174        INIT_LIST_HEAD(&window->list);
 175        window->type = SCIF_WINDOW_SELF;
 176        window->temp = temp;
 177        return window;
 178
 179error_free_window:
 180        scif_free(window->dma_addr,
 181                  nr_pages * sizeof(*window->dma_addr));
 182        scif_free(window, sizeof(*window));
 183error:
 184        return NULL;
 185}
 186
 187/**
 188 * scif_destroy_incomplete_window:
 189 * @ep: end point
 190 * @window: registration window
 191 *
 192 * Deallocate resources for self window.
 193 */
 194static void scif_destroy_incomplete_window(struct scif_endpt *ep,
 195                                           struct scif_window *window)
 196{
 197        int err;
 198        int nr_pages = window->nr_pages;
 199        struct scif_allocmsg *alloc = &window->alloc_handle;
 200        struct scifmsg msg;
 201
 202retry:
 203        /* Wait for a SCIF_ALLOC_GNT/REJ message */
 204        err = wait_event_timeout(alloc->allocwq,
 205                                 alloc->state != OP_IN_PROGRESS,
 206                                 SCIF_NODE_ALIVE_TIMEOUT);
 207        if (!err && scifdev_alive(ep))
 208                goto retry;
 209
 210        mutex_lock(&ep->rma_info.rma_lock);
 211        if (alloc->state == OP_COMPLETED) {
 212                msg.uop = SCIF_FREE_VIRT;
 213                msg.src = ep->port;
 214                msg.payload[0] = ep->remote_ep;
 215                msg.payload[1] = window->alloc_handle.vaddr;
 216                msg.payload[2] = (u64)window;
 217                msg.payload[3] = SCIF_REGISTER;
 218                _scif_nodeqp_send(ep->remote_dev, &msg);
 219        }
 220        mutex_unlock(&ep->rma_info.rma_lock);
 221
 222        scif_free_window_offset(ep, window, window->offset);
 223        scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
 224        scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
 225        scif_free(window, sizeof(*window));
 226}
 227
 228/**
 229 * scif_unmap_window:
 230 * @remote_dev: SCIF remote device
 231 * @window: registration window
 232 *
 233 * Delete any DMA mappings created for a registered self window
 234 */
 235void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
 236{
 237        int j;
 238
 239        if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
 240                if (window->st) {
 241                        dma_unmap_sg(&remote_dev->sdev->dev,
 242                                     window->st->sgl, window->st->nents,
 243                                     DMA_BIDIRECTIONAL);
 244                        sg_free_table(window->st);
 245                        kfree(window->st);
 246                        window->st = NULL;
 247                }
 248        } else {
 249                for (j = 0; j < window->nr_contig_chunks; j++) {
 250                        if (window->dma_addr[j]) {
 251                                scif_unmap_single(window->dma_addr[j],
 252                                                  remote_dev,
 253                                                  window->num_pages[j] <<
 254                                                  PAGE_SHIFT);
 255                                window->dma_addr[j] = 0x0;
 256                        }
 257                }
 258        }
 259}
 260
 261static inline struct mm_struct *__scif_acquire_mm(void)
 262{
 263        if (scif_ulimit_check)
 264                return get_task_mm(current);
 265        return NULL;
 266}
 267
 268static inline void __scif_release_mm(struct mm_struct *mm)
 269{
 270        if (mm)
 271                mmput(mm);
 272}
 273
 274static inline int
 275__scif_dec_pinned_vm_lock(struct mm_struct *mm,
 276                          int nr_pages, bool try_lock)
 277{
 278        if (!mm || !nr_pages || !scif_ulimit_check)
 279                return 0;
 280        if (try_lock) {
 281                if (!down_write_trylock(&mm->mmap_sem)) {
 282                        dev_err(scif_info.mdev.this_device,
 283                                "%s %d err\n", __func__, __LINE__);
 284                        return -1;
 285                }
 286        } else {
 287                down_write(&mm->mmap_sem);
 288        }
 289        mm->pinned_vm -= nr_pages;
 290        up_write(&mm->mmap_sem);
 291        return 0;
 292}
 293
 294static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
 295                                             int nr_pages)
 296{
 297        unsigned long locked, lock_limit;
 298
 299        if (!mm || !nr_pages || !scif_ulimit_check)
 300                return 0;
 301
 302        locked = nr_pages;
 303        locked += mm->pinned_vm;
 304        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 305        if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 306                dev_err(scif_info.mdev.this_device,
 307                        "locked(%lu) > lock_limit(%lu)\n",
 308                        locked, lock_limit);
 309                return -ENOMEM;
 310        }
 311        mm->pinned_vm = locked;
 312        return 0;
 313}
 314
 315/**
 316 * scif_destroy_window:
 317 * @ep: end point
 318 * @window: registration window
 319 *
 320 * Deallocate resources for self window.
 321 */
 322int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
 323{
 324        int j;
 325        struct scif_pinned_pages *pinned_pages = window->pinned_pages;
 326        int nr_pages = window->nr_pages;
 327
 328        might_sleep();
 329        if (!window->temp && window->mm) {
 330                __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
 331                __scif_release_mm(window->mm);
 332                window->mm = NULL;
 333        }
 334
 335        scif_free_window_offset(ep, window, window->offset);
 336        scif_unmap_window(ep->remote_dev, window);
 337        /*
 338         * Decrement references for this set of pinned pages from
 339         * this window.
 340         */
 341        j = atomic_sub_return(1, &pinned_pages->ref_count);
 342        if (j < 0)
 343                dev_err(scif_info.mdev.this_device,
 344                        "%s %d incorrect ref count %d\n",
 345                        __func__, __LINE__, j);
 346        /*
 347         * If the ref count for pinned_pages is zero then someone
 348         * has already called scif_unpin_pages() for it and we should
 349         * destroy the page cache.
 350         */
 351        if (!j)
 352                scif_destroy_pinned_pages(window->pinned_pages);
 353        scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
 354        scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
 355        window->magic = 0;
 356        scif_free(window, sizeof(*window));
 357        return 0;
 358}
 359
 360/**
 361 * scif_create_remote_lookup:
 362 * @remote_dev: SCIF remote device
 363 * @window: remote window
 364 *
 365 * Allocate and prepare lookup entries for the remote
 366 * end to copy over the physical addresses.
 367 * Returns 0 on success and appropriate errno on failure.
 368 */
 369static int scif_create_remote_lookup(struct scif_dev *remote_dev,
 370                                     struct scif_window *window)
 371{
 372        int i, j, err = 0;
 373        int nr_pages = window->nr_pages;
 374        bool vmalloc_dma_phys, vmalloc_num_pages;
 375
 376        might_sleep();
 377        /* Map window */
 378        err = scif_map_single(&window->mapped_offset,
 379                              window, remote_dev, sizeof(*window));
 380        if (err)
 381                goto error_window;
 382
 383        /* Compute the number of lookup entries. 21 == 2MB Shift */
 384        window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
 385                                        ((2) * 1024 * 1024)) >> 21;
 386
 387        window->dma_addr_lookup.lookup =
 388                scif_alloc_coherent(&window->dma_addr_lookup.offset,
 389                                    remote_dev, window->nr_lookup *
 390                                    sizeof(*window->dma_addr_lookup.lookup),
 391                                    GFP_KERNEL | __GFP_ZERO);
 392        if (!window->dma_addr_lookup.lookup) {
 393                err = -ENOMEM;
 394                goto error_window;
 395        }
 396
 397        window->num_pages_lookup.lookup =
 398                scif_alloc_coherent(&window->num_pages_lookup.offset,
 399                                    remote_dev, window->nr_lookup *
 400                                    sizeof(*window->num_pages_lookup.lookup),
 401                                    GFP_KERNEL | __GFP_ZERO);
 402        if (!window->num_pages_lookup.lookup) {
 403                err = -ENOMEM;
 404                goto error_window;
 405        }
 406
 407        vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
 408        vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
 409
 410        /* Now map each of the pages containing physical addresses */
 411        for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
 412                err = scif_map_page(&window->dma_addr_lookup.lookup[j],
 413                                    vmalloc_dma_phys ?
 414                                    vmalloc_to_page(&window->dma_addr[i]) :
 415                                    virt_to_page(&window->dma_addr[i]),
 416                                    remote_dev);
 417                if (err)
 418                        goto error_window;
 419                err = scif_map_page(&window->num_pages_lookup.lookup[j],
 420                                    vmalloc_dma_phys ?
 421                                    vmalloc_to_page(&window->num_pages[i]) :
 422                                    virt_to_page(&window->num_pages[i]),
 423                                    remote_dev);
 424                if (err)
 425                        goto error_window;
 426        }
 427        return 0;
 428error_window:
 429        return err;
 430}
 431
 432/**
 433 * scif_destroy_remote_lookup:
 434 * @remote_dev: SCIF remote device
 435 * @window: remote window
 436 *
 437 * Destroy lookup entries used for the remote
 438 * end to copy over the physical addresses.
 439 */
 440static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
 441                                       struct scif_window *window)
 442{
 443        int i, j;
 444
 445        if (window->nr_lookup) {
 446                struct scif_rma_lookup *lup = &window->dma_addr_lookup;
 447                struct scif_rma_lookup *npup = &window->num_pages_lookup;
 448
 449                for (i = 0, j = 0; i < window->nr_pages;
 450                        i += SCIF_NR_ADDR_IN_PAGE, j++) {
 451                        if (lup->lookup && lup->lookup[j])
 452                                scif_unmap_single(lup->lookup[j],
 453                                                  remote_dev,
 454                                                  PAGE_SIZE);
 455                        if (npup->lookup && npup->lookup[j])
 456                                scif_unmap_single(npup->lookup[j],
 457                                                  remote_dev,
 458                                                  PAGE_SIZE);
 459                }
 460                if (lup->lookup)
 461                        scif_free_coherent(lup->lookup, lup->offset,
 462                                           remote_dev, window->nr_lookup *
 463                                           sizeof(*lup->lookup));
 464                if (npup->lookup)
 465                        scif_free_coherent(npup->lookup, npup->offset,
 466                                           remote_dev, window->nr_lookup *
 467                                           sizeof(*npup->lookup));
 468                if (window->mapped_offset)
 469                        scif_unmap_single(window->mapped_offset,
 470                                          remote_dev, sizeof(*window));
 471                window->nr_lookup = 0;
 472        }
 473}
 474
 475/**
 476 * scif_create_remote_window:
 477 * @ep: end point
 478 * @nr_pages: number of pages in window
 479 *
 480 * Allocate and prepare a remote registration window.
 481 */
 482static struct scif_window *
 483scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
 484{
 485        struct scif_window *window;
 486
 487        might_sleep();
 488        window = scif_zalloc(sizeof(*window));
 489        if (!window)
 490                goto error_ret;
 491
 492        window->magic = SCIFEP_MAGIC;
 493        window->nr_pages = nr_pages;
 494
 495        window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
 496        if (!window->dma_addr)
 497                goto error_window;
 498
 499        window->num_pages = scif_zalloc(nr_pages *
 500                                        sizeof(*window->num_pages));
 501        if (!window->num_pages)
 502                goto error_window;
 503
 504        if (scif_create_remote_lookup(scifdev, window))
 505                goto error_window;
 506
 507        window->type = SCIF_WINDOW_PEER;
 508        window->unreg_state = OP_IDLE;
 509        INIT_LIST_HEAD(&window->list);
 510        return window;
 511error_window:
 512        scif_destroy_remote_window(window);
 513error_ret:
 514        return NULL;
 515}
 516
 517/**
 518 * scif_destroy_remote_window:
 519 * @ep: end point
 520 * @window: remote registration window
 521 *
 522 * Deallocate resources for remote window.
 523 */
 524void
 525scif_destroy_remote_window(struct scif_window *window)
 526{
 527        scif_free(window->dma_addr, window->nr_pages *
 528                  sizeof(*window->dma_addr));
 529        scif_free(window->num_pages, window->nr_pages *
 530                  sizeof(*window->num_pages));
 531        window->magic = 0;
 532        scif_free(window, sizeof(*window));
 533}
 534
 535/**
 536 * scif_iommu_map: create DMA mappings if the IOMMU is enabled
 537 * @remote_dev: SCIF remote device
 538 * @window: remote registration window
 539 *
 540 * Map the physical pages using dma_map_sg(..) and then detect the number
 541 * of contiguous DMA mappings allocated
 542 */
 543static int scif_iommu_map(struct scif_dev *remote_dev,
 544                          struct scif_window *window)
 545{
 546        struct scatterlist *sg;
 547        int i, err;
 548        scif_pinned_pages_t pin = window->pinned_pages;
 549
 550        window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
 551        if (!window->st)
 552                return -ENOMEM;
 553
 554        err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
 555        if (err)
 556                return err;
 557
 558        for_each_sg(window->st->sgl, sg, window->st->nents, i)
 559                sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
 560
 561        err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
 562                         window->st->nents, DMA_BIDIRECTIONAL);
 563        if (!err)
 564                return -ENOMEM;
 565        /* Detect contiguous ranges of DMA mappings */
 566        sg = window->st->sgl;
 567        for (i = 0; sg; i++) {
 568                dma_addr_t last_da;
 569
 570                window->dma_addr[i] = sg_dma_address(sg);
 571                window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
 572                last_da = sg_dma_address(sg) + sg_dma_len(sg);
 573                while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
 574                        window->num_pages[i] +=
 575                                (sg_dma_len(sg) >> PAGE_SHIFT);
 576                        last_da = window->dma_addr[i] +
 577                                sg_dma_len(sg);
 578                }
 579                window->nr_contig_chunks++;
 580        }
 581        return 0;
 582}
 583
 584/**
 585 * scif_map_window:
 586 * @remote_dev: SCIF remote device
 587 * @window: self registration window
 588 *
 589 * Map pages of a window into the aperture/PCI.
 590 * Also determine addresses required for DMA.
 591 */
 592int
 593scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
 594{
 595        int i, j, k, err = 0, nr_contig_pages;
 596        scif_pinned_pages_t pin;
 597        phys_addr_t phys_prev, phys_curr;
 598
 599        might_sleep();
 600
 601        pin = window->pinned_pages;
 602
 603        if (intel_iommu_enabled && !scifdev_self(remote_dev))
 604                return scif_iommu_map(remote_dev, window);
 605
 606        for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
 607                phys_prev = page_to_phys(pin->pages[i]);
 608                nr_contig_pages = 1;
 609
 610                /* Detect physically contiguous chunks */
 611                for (k = i + 1; k < window->nr_pages; k++) {
 612                        phys_curr = page_to_phys(pin->pages[k]);
 613                        if (phys_curr != (phys_prev + PAGE_SIZE))
 614                                break;
 615                        phys_prev = phys_curr;
 616                        nr_contig_pages++;
 617                }
 618                window->num_pages[j] = nr_contig_pages;
 619                window->nr_contig_chunks++;
 620                if (scif_is_mgmt_node()) {
 621                        /*
 622                         * Management node has to deal with SMPT on X100 and
 623                         * hence the DMA mapping is required
 624                         */
 625                        err = scif_map_single(&window->dma_addr[j],
 626                                              phys_to_virt(page_to_phys(
 627                                                           pin->pages[i])),
 628                                              remote_dev,
 629                                              nr_contig_pages << PAGE_SHIFT);
 630                        if (err)
 631                                return err;
 632                } else {
 633                        window->dma_addr[j] = page_to_phys(pin->pages[i]);
 634                }
 635        }
 636        return err;
 637}
 638
 639/**
 640 * scif_send_scif_unregister:
 641 * @ep: end point
 642 * @window: self registration window
 643 *
 644 * Send a SCIF_UNREGISTER message.
 645 */
 646static int scif_send_scif_unregister(struct scif_endpt *ep,
 647                                     struct scif_window *window)
 648{
 649        struct scifmsg msg;
 650
 651        msg.uop = SCIF_UNREGISTER;
 652        msg.src = ep->port;
 653        msg.payload[0] = window->alloc_handle.vaddr;
 654        msg.payload[1] = (u64)window;
 655        return scif_nodeqp_send(ep->remote_dev, &msg);
 656}
 657
 658/**
 659 * scif_unregister_window:
 660 * @window: self registration window
 661 *
 662 * Send an unregistration request and wait for a response.
 663 */
 664int scif_unregister_window(struct scif_window *window)
 665{
 666        int err = 0;
 667        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
 668        bool send_msg = false;
 669
 670        might_sleep();
 671        switch (window->unreg_state) {
 672        case OP_IDLE:
 673        {
 674                window->unreg_state = OP_IN_PROGRESS;
 675                send_msg = true;
 676                /* fall through */
 677        }
 678        case OP_IN_PROGRESS:
 679        {
 680                scif_get_window(window, 1);
 681                mutex_unlock(&ep->rma_info.rma_lock);
 682                if (send_msg) {
 683                        err = scif_send_scif_unregister(ep, window);
 684                        if (err) {
 685                                window->unreg_state = OP_COMPLETED;
 686                                goto done;
 687                        }
 688                } else {
 689                        /* Return ENXIO since unregistration is in progress */
 690                        mutex_lock(&ep->rma_info.rma_lock);
 691                        return -ENXIO;
 692                }
 693retry:
 694                /* Wait for a SCIF_UNREGISTER_(N)ACK message */
 695                err = wait_event_timeout(window->unregwq,
 696                                         window->unreg_state != OP_IN_PROGRESS,
 697                                         SCIF_NODE_ALIVE_TIMEOUT);
 698                if (!err && scifdev_alive(ep))
 699                        goto retry;
 700                if (!err) {
 701                        err = -ENODEV;
 702                        window->unreg_state = OP_COMPLETED;
 703                        dev_err(scif_info.mdev.this_device,
 704                                "%s %d err %d\n", __func__, __LINE__, err);
 705                }
 706                if (err > 0)
 707                        err = 0;
 708done:
 709                mutex_lock(&ep->rma_info.rma_lock);
 710                scif_put_window(window, 1);
 711                break;
 712        }
 713        case OP_FAILED:
 714        {
 715                if (!scifdev_alive(ep)) {
 716                        err = -ENODEV;
 717                        window->unreg_state = OP_COMPLETED;
 718                }
 719                break;
 720        }
 721        case OP_COMPLETED:
 722                break;
 723        default:
 724                err = -ENODEV;
 725        }
 726
 727        if (window->unreg_state == OP_COMPLETED && window->ref_count)
 728                scif_put_window(window, window->nr_pages);
 729
 730        if (!window->ref_count) {
 731                atomic_inc(&ep->rma_info.tw_refcount);
 732                list_del_init(&window->list);
 733                scif_free_window_offset(ep, window, window->offset);
 734                mutex_unlock(&ep->rma_info.rma_lock);
 735                if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
 736                    scifdev_alive(ep)) {
 737                        scif_drain_dma_intr(ep->remote_dev->sdev,
 738                                            ep->rma_info.dma_chan);
 739                } else {
 740                        if (!__scif_dec_pinned_vm_lock(window->mm,
 741                                                       window->nr_pages, 1)) {
 742                                __scif_release_mm(window->mm);
 743                                window->mm = NULL;
 744                        }
 745                }
 746                scif_queue_for_cleanup(window, &scif_info.rma);
 747                mutex_lock(&ep->rma_info.rma_lock);
 748        }
 749        return err;
 750}
 751
 752/**
 753 * scif_send_alloc_request:
 754 * @ep: end point
 755 * @window: self registration window
 756 *
 757 * Send a remote window allocation request
 758 */
 759static int scif_send_alloc_request(struct scif_endpt *ep,
 760                                   struct scif_window *window)
 761{
 762        struct scifmsg msg;
 763        struct scif_allocmsg *alloc = &window->alloc_handle;
 764
 765        /* Set up the Alloc Handle */
 766        alloc->state = OP_IN_PROGRESS;
 767        init_waitqueue_head(&alloc->allocwq);
 768
 769        /* Send out an allocation request */
 770        msg.uop = SCIF_ALLOC_REQ;
 771        msg.payload[1] = window->nr_pages;
 772        msg.payload[2] = (u64)&window->alloc_handle;
 773        return _scif_nodeqp_send(ep->remote_dev, &msg);
 774}
 775
 776/**
 777 * scif_prep_remote_window:
 778 * @ep: end point
 779 * @window: self registration window
 780 *
 781 * Send a remote window allocation request, wait for an allocation response,
 782 * and prepares the remote window by copying over the page lists
 783 */
 784static int scif_prep_remote_window(struct scif_endpt *ep,
 785                                   struct scif_window *window)
 786{
 787        struct scifmsg msg;
 788        struct scif_window *remote_window;
 789        struct scif_allocmsg *alloc = &window->alloc_handle;
 790        dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
 791        int i = 0, j = 0;
 792        int nr_contig_chunks, loop_nr_contig_chunks;
 793        int remaining_nr_contig_chunks, nr_lookup;
 794        int err, map_err;
 795
 796        map_err = scif_map_window(ep->remote_dev, window);
 797        if (map_err)
 798                dev_err(&ep->remote_dev->sdev->dev,
 799                        "%s %d map_err %d\n", __func__, __LINE__, map_err);
 800        remaining_nr_contig_chunks = window->nr_contig_chunks;
 801        nr_contig_chunks = window->nr_contig_chunks;
 802retry:
 803        /* Wait for a SCIF_ALLOC_GNT/REJ message */
 804        err = wait_event_timeout(alloc->allocwq,
 805                                 alloc->state != OP_IN_PROGRESS,
 806                                 SCIF_NODE_ALIVE_TIMEOUT);
 807        mutex_lock(&ep->rma_info.rma_lock);
 808        /* Synchronize with the thread waking up allocwq */
 809        mutex_unlock(&ep->rma_info.rma_lock);
 810        if (!err && scifdev_alive(ep))
 811                goto retry;
 812
 813        if (!err)
 814                err = -ENODEV;
 815
 816        if (err > 0)
 817                err = 0;
 818        else
 819                return err;
 820
 821        /* Bail out. The remote end rejected this request */
 822        if (alloc->state == OP_FAILED)
 823                return -ENOMEM;
 824
 825        if (map_err) {
 826                dev_err(&ep->remote_dev->sdev->dev,
 827                        "%s %d err %d\n", __func__, __LINE__, map_err);
 828                msg.uop = SCIF_FREE_VIRT;
 829                msg.src = ep->port;
 830                msg.payload[0] = ep->remote_ep;
 831                msg.payload[1] = window->alloc_handle.vaddr;
 832                msg.payload[2] = (u64)window;
 833                msg.payload[3] = SCIF_REGISTER;
 834                spin_lock(&ep->lock);
 835                if (ep->state == SCIFEP_CONNECTED)
 836                        err = _scif_nodeqp_send(ep->remote_dev, &msg);
 837                else
 838                        err = -ENOTCONN;
 839                spin_unlock(&ep->lock);
 840                return err;
 841        }
 842
 843        remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
 844                                     ep->remote_dev);
 845
 846        /* Compute the number of lookup entries. 21 == 2MB Shift */
 847        nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
 848                          >> ilog2(SCIF_NR_ADDR_IN_PAGE);
 849
 850        dma_phys_lookup =
 851                scif_ioremap(remote_window->dma_addr_lookup.offset,
 852                             nr_lookup *
 853                             sizeof(*remote_window->dma_addr_lookup.lookup),
 854                             ep->remote_dev);
 855        num_pages_lookup =
 856                scif_ioremap(remote_window->num_pages_lookup.offset,
 857                             nr_lookup *
 858                             sizeof(*remote_window->num_pages_lookup.lookup),
 859                             ep->remote_dev);
 860
 861        while (remaining_nr_contig_chunks) {
 862                loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
 863                                              (int)SCIF_NR_ADDR_IN_PAGE);
 864                /* #1/2 - Copy  physical addresses over to the remote side */
 865
 866                /* #2/2 - Copy DMA addresses (addresses that are fed into the
 867                 * DMA engine) We transfer bus addresses which are then
 868                 * converted into a MIC physical address on the remote
 869                 * side if it is a MIC, if the remote node is a mgmt node we
 870                 * transfer the MIC physical address
 871                 */
 872                tmp = scif_ioremap(dma_phys_lookup[j],
 873                                   loop_nr_contig_chunks *
 874                                   sizeof(*window->dma_addr),
 875                                   ep->remote_dev);
 876                tmp1 = scif_ioremap(num_pages_lookup[j],
 877                                    loop_nr_contig_chunks *
 878                                    sizeof(*window->num_pages),
 879                                    ep->remote_dev);
 880                if (scif_is_mgmt_node()) {
 881                        memcpy_toio((void __force __iomem *)tmp,
 882                                    &window->dma_addr[i], loop_nr_contig_chunks
 883                                    * sizeof(*window->dma_addr));
 884                        memcpy_toio((void __force __iomem *)tmp1,
 885                                    &window->num_pages[i], loop_nr_contig_chunks
 886                                    * sizeof(*window->num_pages));
 887                } else {
 888                        if (scifdev_is_p2p(ep->remote_dev)) {
 889                                /*
 890                                 * add remote node's base address for this node
 891                                 * to convert it into a MIC address
 892                                 */
 893                                int m;
 894                                dma_addr_t dma_addr;
 895
 896                                for (m = 0; m < loop_nr_contig_chunks; m++) {
 897                                        dma_addr = window->dma_addr[i + m] +
 898                                                ep->remote_dev->base_addr;
 899                                        writeq(dma_addr,
 900                                               (void __force __iomem *)&tmp[m]);
 901                                }
 902                                memcpy_toio((void __force __iomem *)tmp1,
 903                                            &window->num_pages[i],
 904                                            loop_nr_contig_chunks
 905                                            * sizeof(*window->num_pages));
 906                        } else {
 907                                /* Mgmt node or loopback - transfer DMA
 908                                 * addresses as is, this is the same as a
 909                                 * MIC physical address (we use the dma_addr
 910                                 * and not the phys_addr array since the
 911                                 * phys_addr is only setup if there is a mmap()
 912                                 * request from the mgmt node)
 913                                 */
 914                                memcpy_toio((void __force __iomem *)tmp,
 915                                            &window->dma_addr[i],
 916                                            loop_nr_contig_chunks *
 917                                            sizeof(*window->dma_addr));
 918                                memcpy_toio((void __force __iomem *)tmp1,
 919                                            &window->num_pages[i],
 920                                            loop_nr_contig_chunks *
 921                                            sizeof(*window->num_pages));
 922                        }
 923                }
 924                remaining_nr_contig_chunks -= loop_nr_contig_chunks;
 925                i += loop_nr_contig_chunks;
 926                j++;
 927                scif_iounmap(tmp, loop_nr_contig_chunks *
 928                             sizeof(*window->dma_addr), ep->remote_dev);
 929                scif_iounmap(tmp1, loop_nr_contig_chunks *
 930                             sizeof(*window->num_pages), ep->remote_dev);
 931        }
 932
 933        /* Prepare the remote window for the peer */
 934        remote_window->peer_window = (u64)window;
 935        remote_window->offset = window->offset;
 936        remote_window->prot = window->prot;
 937        remote_window->nr_contig_chunks = nr_contig_chunks;
 938        remote_window->ep = ep->remote_ep;
 939        scif_iounmap(num_pages_lookup,
 940                     nr_lookup *
 941                     sizeof(*remote_window->num_pages_lookup.lookup),
 942                     ep->remote_dev);
 943        scif_iounmap(dma_phys_lookup,
 944                     nr_lookup *
 945                     sizeof(*remote_window->dma_addr_lookup.lookup),
 946                     ep->remote_dev);
 947        scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
 948        window->peer_window = alloc->vaddr;
 949        return err;
 950}
 951
 952/**
 953 * scif_send_scif_register:
 954 * @ep: end point
 955 * @window: self registration window
 956 *
 957 * Send a SCIF_REGISTER message if EP is connected and wait for a
 958 * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
 959 * message so that the peer can free its remote window allocated earlier.
 960 */
 961static int scif_send_scif_register(struct scif_endpt *ep,
 962                                   struct scif_window *window)
 963{
 964        int err = 0;
 965        struct scifmsg msg;
 966
 967        msg.src = ep->port;
 968        msg.payload[0] = ep->remote_ep;
 969        msg.payload[1] = window->alloc_handle.vaddr;
 970        msg.payload[2] = (u64)window;
 971        spin_lock(&ep->lock);
 972        if (ep->state == SCIFEP_CONNECTED) {
 973                msg.uop = SCIF_REGISTER;
 974                window->reg_state = OP_IN_PROGRESS;
 975                err = _scif_nodeqp_send(ep->remote_dev, &msg);
 976                spin_unlock(&ep->lock);
 977                if (!err) {
 978retry:
 979                        /* Wait for a SCIF_REGISTER_(N)ACK message */
 980                        err = wait_event_timeout(window->regwq,
 981                                                 window->reg_state !=
 982                                                 OP_IN_PROGRESS,
 983                                                 SCIF_NODE_ALIVE_TIMEOUT);
 984                        if (!err && scifdev_alive(ep))
 985                                goto retry;
 986                        err = !err ? -ENODEV : 0;
 987                        if (window->reg_state == OP_FAILED)
 988                                err = -ENOTCONN;
 989                }
 990        } else {
 991                msg.uop = SCIF_FREE_VIRT;
 992                msg.payload[3] = SCIF_REGISTER;
 993                err = _scif_nodeqp_send(ep->remote_dev, &msg);
 994                spin_unlock(&ep->lock);
 995                if (!err)
 996                        err = -ENOTCONN;
 997        }
 998        return err;
 999}
1000
1001/**
1002 * scif_get_window_offset:
1003 * @ep: end point descriptor
1004 * @flags: flags
1005 * @offset: offset hint
1006 * @num_pages: number of pages
1007 * @out_offset: computed offset returned by reference.
1008 *
1009 * Compute/Claim a new offset for this EP.
1010 */
1011int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
1012                           int num_pages, s64 *out_offset)
1013{
1014        s64 page_index;
1015        struct iova *iova_ptr;
1016        int err = 0;
1017
1018        if (flags & SCIF_MAP_FIXED) {
1019                page_index = SCIF_IOVA_PFN(offset);
1020                iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1021                                        page_index + num_pages - 1);
1022                if (!iova_ptr)
1023                        err = -EADDRINUSE;
1024        } else {
1025                iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1026                                      SCIF_DMA_63BIT_PFN - 1, 0);
1027                if (!iova_ptr)
1028                        err = -ENOMEM;
1029        }
1030        if (!err)
1031                *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1032        return err;
1033}
1034
1035/**
1036 * scif_free_window_offset:
1037 * @ep: end point descriptor
1038 * @window: registration window
1039 * @offset: Offset to be freed
1040 *
1041 * Free offset for this EP. The callee is supposed to grab
1042 * the RMA mutex before calling this API.
1043 */
1044void scif_free_window_offset(struct scif_endpt *ep,
1045                             struct scif_window *window, s64 offset)
1046{
1047        if ((window && !window->offset_freed) || !window) {
1048                free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1049                if (window)
1050                        window->offset_freed = true;
1051        }
1052}
1053
1054/**
1055 * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1056 * @msg:        Interrupt message
1057 *
1058 * Remote side is requesting a memory allocation.
1059 */
1060void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1061{
1062        int err;
1063        struct scif_window *window = NULL;
1064        int nr_pages = msg->payload[1];
1065
1066        window = scif_create_remote_window(scifdev, nr_pages);
1067        if (!window) {
1068                err = -ENOMEM;
1069                goto error;
1070        }
1071
1072        /* The peer's allocation request is granted */
1073        msg->uop = SCIF_ALLOC_GNT;
1074        msg->payload[0] = (u64)window;
1075        msg->payload[1] = window->mapped_offset;
1076        err = scif_nodeqp_send(scifdev, msg);
1077        if (err)
1078                scif_destroy_remote_window(window);
1079        return;
1080error:
1081        /* The peer's allocation request is rejected */
1082        dev_err(&scifdev->sdev->dev,
1083                "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1084                __func__, __LINE__, err, window, nr_pages);
1085        msg->uop = SCIF_ALLOC_REJ;
1086        scif_nodeqp_send(scifdev, msg);
1087}
1088
1089/**
1090 * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1091 * @msg:        Interrupt message
1092 *
1093 * Remote side responded to a memory allocation.
1094 */
1095void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1096{
1097        struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1098        struct scif_window *window = container_of(handle, struct scif_window,
1099                                                  alloc_handle);
1100        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1101
1102        mutex_lock(&ep->rma_info.rma_lock);
1103        handle->vaddr = msg->payload[0];
1104        handle->phys_addr = msg->payload[1];
1105        if (msg->uop == SCIF_ALLOC_GNT)
1106                handle->state = OP_COMPLETED;
1107        else
1108                handle->state = OP_FAILED;
1109        wake_up(&handle->allocwq);
1110        mutex_unlock(&ep->rma_info.rma_lock);
1111}
1112
1113/**
1114 * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1115 * @msg:        Interrupt message
1116 *
1117 * Free up memory kmalloc'd earlier.
1118 */
1119void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1120{
1121        struct scif_window *window = (struct scif_window *)msg->payload[1];
1122
1123        scif_destroy_remote_window(window);
1124}
1125
1126static void
1127scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1128{
1129        int j;
1130        struct scif_hw_dev *sdev = dev->sdev;
1131        phys_addr_t apt_base = 0;
1132
1133        /*
1134         * Add the aperture base if the DMA address is not card relative
1135         * since the DMA addresses need to be an offset into the bar
1136         */
1137        if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1138            sdev->aper && !sdev->card_rel_da)
1139                apt_base = sdev->aper->pa;
1140        else
1141                return;
1142
1143        for (j = 0; j < window->nr_contig_chunks; j++) {
1144                if (window->num_pages[j])
1145                        window->dma_addr[j] += apt_base;
1146                else
1147                        break;
1148        }
1149}
1150
1151/**
1152 * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1153 * @msg:        Interrupt message
1154 *
1155 * Update remote window list with a new registered window.
1156 */
1157void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1158{
1159        struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1160        struct scif_window *window =
1161                (struct scif_window *)msg->payload[1];
1162
1163        mutex_lock(&ep->rma_info.rma_lock);
1164        spin_lock(&ep->lock);
1165        if (ep->state == SCIFEP_CONNECTED) {
1166                msg->uop = SCIF_REGISTER_ACK;
1167                scif_nodeqp_send(ep->remote_dev, msg);
1168                scif_fixup_aper_base(ep->remote_dev, window);
1169                /* No further failures expected. Insert new window */
1170                scif_insert_window(window, &ep->rma_info.remote_reg_list);
1171        } else {
1172                msg->uop = SCIF_REGISTER_NACK;
1173                scif_nodeqp_send(ep->remote_dev, msg);
1174        }
1175        spin_unlock(&ep->lock);
1176        mutex_unlock(&ep->rma_info.rma_lock);
1177        /* free up any lookup resources now that page lists are transferred */
1178        scif_destroy_remote_lookup(ep->remote_dev, window);
1179        /*
1180         * We could not insert the window but we need to
1181         * destroy the window.
1182         */
1183        if (msg->uop == SCIF_REGISTER_NACK)
1184                scif_destroy_remote_window(window);
1185}
1186
1187/**
1188 * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1189 * @msg:        Interrupt message
1190 *
1191 * Remove window from remote registration list;
1192 */
1193void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1194{
1195        struct scif_rma_req req;
1196        struct scif_window *window = NULL;
1197        struct scif_window *recv_window =
1198                (struct scif_window *)msg->payload[0];
1199        struct scif_endpt *ep;
1200        int del_window = 0;
1201
1202        ep = (struct scif_endpt *)recv_window->ep;
1203        req.out_window = &window;
1204        req.offset = recv_window->offset;
1205        req.prot = 0;
1206        req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1207        req.type = SCIF_WINDOW_FULL;
1208        req.head = &ep->rma_info.remote_reg_list;
1209        msg->payload[0] = ep->remote_ep;
1210
1211        mutex_lock(&ep->rma_info.rma_lock);
1212        /* Does a valid window exist? */
1213        if (scif_query_window(&req)) {
1214                dev_err(&scifdev->sdev->dev,
1215                        "%s %d -ENXIO\n", __func__, __LINE__);
1216                msg->uop = SCIF_UNREGISTER_ACK;
1217                goto error;
1218        }
1219        if (window) {
1220                if (window->ref_count)
1221                        scif_put_window(window, window->nr_pages);
1222                else
1223                        dev_err(&scifdev->sdev->dev,
1224                                "%s %d ref count should be +ve\n",
1225                                __func__, __LINE__);
1226                window->unreg_state = OP_COMPLETED;
1227                if (!window->ref_count) {
1228                        msg->uop = SCIF_UNREGISTER_ACK;
1229                        atomic_inc(&ep->rma_info.tw_refcount);
1230                        ep->rma_info.async_list_del = 1;
1231                        list_del_init(&window->list);
1232                        del_window = 1;
1233                } else {
1234                        /* NACK! There are valid references to this window */
1235                        msg->uop = SCIF_UNREGISTER_NACK;
1236                }
1237        } else {
1238                /* The window did not make its way to the list at all. ACK */
1239                msg->uop = SCIF_UNREGISTER_ACK;
1240                scif_destroy_remote_window(recv_window);
1241        }
1242error:
1243        mutex_unlock(&ep->rma_info.rma_lock);
1244        if (del_window)
1245                scif_drain_dma_intr(ep->remote_dev->sdev,
1246                                    ep->rma_info.dma_chan);
1247        scif_nodeqp_send(ep->remote_dev, msg);
1248        if (del_window)
1249                scif_queue_for_cleanup(window, &scif_info.rma);
1250}
1251
1252/**
1253 * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1254 * @msg:        Interrupt message
1255 *
1256 * Wake up the window waiting to complete registration.
1257 */
1258void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1259{
1260        struct scif_window *window =
1261                (struct scif_window *)msg->payload[2];
1262        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1263
1264        mutex_lock(&ep->rma_info.rma_lock);
1265        window->reg_state = OP_COMPLETED;
1266        wake_up(&window->regwq);
1267        mutex_unlock(&ep->rma_info.rma_lock);
1268}
1269
1270/**
1271 * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1272 * @msg:        Interrupt message
1273 *
1274 * Wake up the window waiting to inform it that registration
1275 * cannot be completed.
1276 */
1277void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1278{
1279        struct scif_window *window =
1280                (struct scif_window *)msg->payload[2];
1281        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1282
1283        mutex_lock(&ep->rma_info.rma_lock);
1284        window->reg_state = OP_FAILED;
1285        wake_up(&window->regwq);
1286        mutex_unlock(&ep->rma_info.rma_lock);
1287}
1288
1289/**
1290 * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1291 * @msg:        Interrupt message
1292 *
1293 * Wake up the window waiting to complete unregistration.
1294 */
1295void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1296{
1297        struct scif_window *window =
1298                (struct scif_window *)msg->payload[1];
1299        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1300
1301        mutex_lock(&ep->rma_info.rma_lock);
1302        window->unreg_state = OP_COMPLETED;
1303        wake_up(&window->unregwq);
1304        mutex_unlock(&ep->rma_info.rma_lock);
1305}
1306
1307/**
1308 * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1309 * @msg:        Interrupt message
1310 *
1311 * Wake up the window waiting to inform it that unregistration
1312 * cannot be completed immediately.
1313 */
1314void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1315{
1316        struct scif_window *window =
1317                (struct scif_window *)msg->payload[1];
1318        struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1319
1320        mutex_lock(&ep->rma_info.rma_lock);
1321        window->unreg_state = OP_FAILED;
1322        wake_up(&window->unregwq);
1323        mutex_unlock(&ep->rma_info.rma_lock);
1324}
1325
1326int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1327                     int map_flags, scif_pinned_pages_t *pages)
1328{
1329        struct scif_pinned_pages *pinned_pages;
1330        int nr_pages, err = 0, i;
1331        bool vmalloc_addr = false;
1332        bool try_upgrade = false;
1333        int prot = *out_prot;
1334        int ulimit = 0;
1335        struct mm_struct *mm = NULL;
1336
1337        /* Unsupported flags */
1338        if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1339                return -EINVAL;
1340        ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1341
1342        /* Unsupported protection requested */
1343        if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1344                return -EINVAL;
1345
1346        /* addr/len must be page aligned. len should be non zero */
1347        if (!len ||
1348            (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1349            (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1350                return -EINVAL;
1351
1352        might_sleep();
1353
1354        nr_pages = len >> PAGE_SHIFT;
1355
1356        /* Allocate a set of pinned pages */
1357        pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1358        if (!pinned_pages)
1359                return -ENOMEM;
1360
1361        if (map_flags & SCIF_MAP_KERNEL) {
1362                if (is_vmalloc_addr(addr))
1363                        vmalloc_addr = true;
1364
1365                for (i = 0; i < nr_pages; i++) {
1366                        if (vmalloc_addr)
1367                                pinned_pages->pages[i] =
1368                                        vmalloc_to_page(addr + (i * PAGE_SIZE));
1369                        else
1370                                pinned_pages->pages[i] =
1371                                        virt_to_page(addr + (i * PAGE_SIZE));
1372                }
1373                pinned_pages->nr_pages = nr_pages;
1374                pinned_pages->map_flags = SCIF_MAP_KERNEL;
1375        } else {
1376                /*
1377                 * SCIF supports registration caching. If a registration has
1378                 * been requested with read only permissions, then we try
1379                 * to pin the pages with RW permissions so that a subsequent
1380                 * transfer with RW permission can hit the cache instead of
1381                 * invalidating it. If the upgrade fails with RW then we
1382                 * revert back to R permission and retry
1383                 */
1384                if (prot == SCIF_PROT_READ)
1385                        try_upgrade = true;
1386                prot |= SCIF_PROT_WRITE;
1387retry:
1388                mm = current->mm;
1389                down_write(&mm->mmap_sem);
1390                if (ulimit) {
1391                        err = __scif_check_inc_pinned_vm(mm, nr_pages);
1392                        if (err) {
1393                                up_write(&mm->mmap_sem);
1394                                pinned_pages->nr_pages = 0;
1395                                goto error_unmap;
1396                        }
1397                }
1398
1399                pinned_pages->nr_pages = get_user_pages(
1400                                (u64)addr,
1401                                nr_pages,
1402                                (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
1403                                pinned_pages->pages,
1404                                NULL);
1405                up_write(&mm->mmap_sem);
1406                if (nr_pages != pinned_pages->nr_pages) {
1407                        if (try_upgrade) {
1408                                if (ulimit)
1409                                        __scif_dec_pinned_vm_lock(mm,
1410                                                                  nr_pages, 0);
1411                                /* Roll back any pinned pages */
1412                                for (i = 0; i < pinned_pages->nr_pages; i++) {
1413                                        if (pinned_pages->pages[i])
1414                                                put_page(
1415                                                pinned_pages->pages[i]);
1416                                }
1417                                prot &= ~SCIF_PROT_WRITE;
1418                                try_upgrade = false;
1419                                goto retry;
1420                        }
1421                }
1422                pinned_pages->map_flags = 0;
1423        }
1424
1425        if (pinned_pages->nr_pages < nr_pages) {
1426                err = -EFAULT;
1427                pinned_pages->nr_pages = nr_pages;
1428                goto dec_pinned;
1429        }
1430
1431        *out_prot = prot;
1432        atomic_set(&pinned_pages->ref_count, 1);
1433        *pages = pinned_pages;
1434        return err;
1435dec_pinned:
1436        if (ulimit)
1437                __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
1438        /* Something went wrong! Rollback */
1439error_unmap:
1440        pinned_pages->nr_pages = nr_pages;
1441        scif_destroy_pinned_pages(pinned_pages);
1442        *pages = NULL;
1443        dev_dbg(scif_info.mdev.this_device,
1444                "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1445        return err;
1446}
1447
1448int scif_pin_pages(void *addr, size_t len, int prot,
1449                   int map_flags, scif_pinned_pages_t *pages)
1450{
1451        return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1452}
1453EXPORT_SYMBOL_GPL(scif_pin_pages);
1454
1455int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1456{
1457        int err = 0, ret;
1458
1459        if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1460                return -EINVAL;
1461
1462        ret = atomic_sub_return(1, &pinned_pages->ref_count);
1463        if (ret < 0) {
1464                dev_err(scif_info.mdev.this_device,
1465                        "%s %d scif_unpin_pages called without pinning? rc %d\n",
1466                        __func__, __LINE__, ret);
1467                return -EINVAL;
1468        }
1469        /*
1470         * Destroy the window if the ref count for this set of pinned
1471         * pages has dropped to zero. If it is positive then there is
1472         * a valid registered window which is backed by these pages and
1473         * it will be destroyed once all such windows are unregistered.
1474         */
1475        if (!ret)
1476                err = scif_destroy_pinned_pages(pinned_pages);
1477
1478        return err;
1479}
1480EXPORT_SYMBOL_GPL(scif_unpin_pages);
1481
1482static inline void
1483scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1484{
1485        mutex_lock(&ep->rma_info.rma_lock);
1486        scif_insert_window(window, &ep->rma_info.reg_list);
1487        mutex_unlock(&ep->rma_info.rma_lock);
1488}
1489
1490off_t scif_register_pinned_pages(scif_epd_t epd,
1491                                 scif_pinned_pages_t pinned_pages,
1492                                 off_t offset, int map_flags)
1493{
1494        struct scif_endpt *ep = (struct scif_endpt *)epd;
1495        s64 computed_offset;
1496        struct scif_window *window;
1497        int err;
1498        size_t len;
1499        struct device *spdev;
1500
1501        /* Unsupported flags */
1502        if (map_flags & ~SCIF_MAP_FIXED)
1503                return -EINVAL;
1504
1505        len = pinned_pages->nr_pages << PAGE_SHIFT;
1506
1507        /*
1508         * Offset is not page aligned/negative or offset+len
1509         * wraps around with SCIF_MAP_FIXED.
1510         */
1511        if ((map_flags & SCIF_MAP_FIXED) &&
1512            ((ALIGN(offset, PAGE_SIZE) != offset) ||
1513            (offset < 0) ||
1514            (len > LONG_MAX - offset)))
1515                return -EINVAL;
1516
1517        might_sleep();
1518
1519        err = scif_verify_epd(ep);
1520        if (err)
1521                return err;
1522        /*
1523         * It is an error to pass pinned_pages to scif_register_pinned_pages()
1524         * after calling scif_unpin_pages().
1525         */
1526        if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1527                return -EINVAL;
1528
1529        /* Compute the offset for this registration */
1530        err = scif_get_window_offset(ep, map_flags, offset,
1531                                     len, &computed_offset);
1532        if (err) {
1533                atomic_sub(1, &pinned_pages->ref_count);
1534                return err;
1535        }
1536
1537        /* Allocate and prepare self registration window */
1538        window = scif_create_window(ep, pinned_pages->nr_pages,
1539                                    computed_offset, false);
1540        if (!window) {
1541                atomic_sub(1, &pinned_pages->ref_count);
1542                scif_free_window_offset(ep, NULL, computed_offset);
1543                return -ENOMEM;
1544        }
1545
1546        window->pinned_pages = pinned_pages;
1547        window->nr_pages = pinned_pages->nr_pages;
1548        window->prot = pinned_pages->prot;
1549
1550        spdev = scif_get_peer_dev(ep->remote_dev);
1551        if (IS_ERR(spdev)) {
1552                err = PTR_ERR(spdev);
1553                scif_destroy_window(ep, window);
1554                return err;
1555        }
1556        err = scif_send_alloc_request(ep, window);
1557        if (err) {
1558                dev_err(&ep->remote_dev->sdev->dev,
1559                        "%s %d err %d\n", __func__, __LINE__, err);
1560                goto error_unmap;
1561        }
1562
1563        /* Prepare the remote registration window */
1564        err = scif_prep_remote_window(ep, window);
1565        if (err) {
1566                dev_err(&ep->remote_dev->sdev->dev,
1567                        "%s %d err %d\n", __func__, __LINE__, err);
1568                goto error_unmap;
1569        }
1570
1571        /* Tell the peer about the new window */
1572        err = scif_send_scif_register(ep, window);
1573        if (err) {
1574                dev_err(&ep->remote_dev->sdev->dev,
1575                        "%s %d err %d\n", __func__, __LINE__, err);
1576                goto error_unmap;
1577        }
1578
1579        scif_put_peer_dev(spdev);
1580        /* No further failures expected. Insert new window */
1581        scif_insert_local_window(window, ep);
1582        return computed_offset;
1583error_unmap:
1584        scif_destroy_window(ep, window);
1585        scif_put_peer_dev(spdev);
1586        dev_err(&ep->remote_dev->sdev->dev,
1587                "%s %d err %d\n", __func__, __LINE__, err);
1588        return err;
1589}
1590EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1591
1592off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1593                    int prot, int map_flags)
1594{
1595        scif_pinned_pages_t pinned_pages;
1596        off_t err;
1597        struct scif_endpt *ep = (struct scif_endpt *)epd;
1598        s64 computed_offset;
1599        struct scif_window *window;
1600        struct mm_struct *mm = NULL;
1601        struct device *spdev;
1602
1603        dev_dbg(scif_info.mdev.this_device,
1604                "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1605                epd, addr, len, offset, prot, map_flags);
1606        /* Unsupported flags */
1607        if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1608                return -EINVAL;
1609
1610        /*
1611         * Offset is not page aligned/negative or offset+len
1612         * wraps around with SCIF_MAP_FIXED.
1613         */
1614        if ((map_flags & SCIF_MAP_FIXED) &&
1615            ((ALIGN(offset, PAGE_SIZE) != offset) ||
1616            (offset < 0) ||
1617            (len > LONG_MAX - offset)))
1618                return -EINVAL;
1619
1620        /* Unsupported protection requested */
1621        if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1622                return -EINVAL;
1623
1624        /* addr/len must be page aligned. len should be non zero */
1625        if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1626            (ALIGN(len, PAGE_SIZE) != len))
1627                return -EINVAL;
1628
1629        might_sleep();
1630
1631        err = scif_verify_epd(ep);
1632        if (err)
1633                return err;
1634
1635        /* Compute the offset for this registration */
1636        err = scif_get_window_offset(ep, map_flags, offset,
1637                                     len >> PAGE_SHIFT, &computed_offset);
1638        if (err)
1639                return err;
1640
1641        spdev = scif_get_peer_dev(ep->remote_dev);
1642        if (IS_ERR(spdev)) {
1643                err = PTR_ERR(spdev);
1644                scif_free_window_offset(ep, NULL, computed_offset);
1645                return err;
1646        }
1647        /* Allocate and prepare self registration window */
1648        window = scif_create_window(ep, len >> PAGE_SHIFT,
1649                                    computed_offset, false);
1650        if (!window) {
1651                scif_free_window_offset(ep, NULL, computed_offset);
1652                scif_put_peer_dev(spdev);
1653                return -ENOMEM;
1654        }
1655
1656        window->nr_pages = len >> PAGE_SHIFT;
1657
1658        err = scif_send_alloc_request(ep, window);
1659        if (err) {
1660                scif_destroy_incomplete_window(ep, window);
1661                scif_put_peer_dev(spdev);
1662                return err;
1663        }
1664
1665        if (!(map_flags & SCIF_MAP_KERNEL)) {
1666                mm = __scif_acquire_mm();
1667                map_flags |= SCIF_MAP_ULIMIT;
1668        }
1669        /* Pin down the pages */
1670        err = __scif_pin_pages(addr, len, &prot,
1671                               map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1672                               &pinned_pages);
1673        if (err) {
1674                scif_destroy_incomplete_window(ep, window);
1675                __scif_release_mm(mm);
1676                goto error;
1677        }
1678
1679        window->pinned_pages = pinned_pages;
1680        window->prot = pinned_pages->prot;
1681        window->mm = mm;
1682
1683        /* Prepare the remote registration window */
1684        err = scif_prep_remote_window(ep, window);
1685        if (err) {
1686                dev_err(&ep->remote_dev->sdev->dev,
1687                        "%s %d err %ld\n", __func__, __LINE__, err);
1688                goto error_unmap;
1689        }
1690
1691        /* Tell the peer about the new window */
1692        err = scif_send_scif_register(ep, window);
1693        if (err) {
1694                dev_err(&ep->remote_dev->sdev->dev,
1695                        "%s %d err %ld\n", __func__, __LINE__, err);
1696                goto error_unmap;
1697        }
1698
1699        scif_put_peer_dev(spdev);
1700        /* No further failures expected. Insert new window */
1701        scif_insert_local_window(window, ep);
1702        dev_dbg(&ep->remote_dev->sdev->dev,
1703                "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1704                epd, addr, len, computed_offset);
1705        return computed_offset;
1706error_unmap:
1707        scif_destroy_window(ep, window);
1708error:
1709        scif_put_peer_dev(spdev);
1710        dev_err(&ep->remote_dev->sdev->dev,
1711                "%s %d err %ld\n", __func__, __LINE__, err);
1712        return err;
1713}
1714EXPORT_SYMBOL_GPL(scif_register);
1715
1716int
1717scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1718{
1719        struct scif_endpt *ep = (struct scif_endpt *)epd;
1720        struct scif_window *window = NULL;
1721        struct scif_rma_req req;
1722        int nr_pages, err;
1723        struct device *spdev;
1724
1725        dev_dbg(scif_info.mdev.this_device,
1726                "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1727                ep, offset, len);
1728        /* len must be page aligned. len should be non zero */
1729        if (!len ||
1730            (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1731                return -EINVAL;
1732
1733        /* Offset is not page aligned or offset+len wraps around */
1734        if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1735            (offset < 0) ||
1736            (len > LONG_MAX - offset))
1737                return -EINVAL;
1738
1739        err = scif_verify_epd(ep);
1740        if (err)
1741                return err;
1742
1743        might_sleep();
1744        nr_pages = len >> PAGE_SHIFT;
1745
1746        req.out_window = &window;
1747        req.offset = offset;
1748        req.prot = 0;
1749        req.nr_bytes = len;
1750        req.type = SCIF_WINDOW_FULL;
1751        req.head = &ep->rma_info.reg_list;
1752
1753        spdev = scif_get_peer_dev(ep->remote_dev);
1754        if (IS_ERR(spdev)) {
1755                err = PTR_ERR(spdev);
1756                return err;
1757        }
1758        mutex_lock(&ep->rma_info.rma_lock);
1759        /* Does a valid window exist? */
1760        err = scif_query_window(&req);
1761        if (err) {
1762                dev_err(&ep->remote_dev->sdev->dev,
1763                        "%s %d err %d\n", __func__, __LINE__, err);
1764                goto error;
1765        }
1766        /* Unregister all the windows in this range */
1767        err = scif_rma_list_unregister(window, offset, nr_pages);
1768        if (err)
1769                dev_err(&ep->remote_dev->sdev->dev,
1770                        "%s %d err %d\n", __func__, __LINE__, err);
1771error:
1772        mutex_unlock(&ep->rma_info.rma_lock);
1773        scif_put_peer_dev(spdev);
1774        return err;
1775}
1776EXPORT_SYMBOL_GPL(scif_unregister);
1777