linux/drivers/misc/vmw_vmci/vmci_context.c
<<
>>
Prefs
   1/*
   2 * VMware VMCI Driver
   3 *
   4 * Copyright (C) 2012 VMware, Inc. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation version 2 and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 * for more details.
  14 */
  15
  16#include <linux/vmw_vmci_defs.h>
  17#include <linux/vmw_vmci_api.h>
  18#include <linux/highmem.h>
  19#include <linux/kernel.h>
  20#include <linux/module.h>
  21#include <linux/sched.h>
  22#include <linux/cred.h>
  23#include <linux/slab.h>
  24
  25#include "vmci_queue_pair.h"
  26#include "vmci_datagram.h"
  27#include "vmci_doorbell.h"
  28#include "vmci_context.h"
  29#include "vmci_driver.h"
  30#include "vmci_event.h"
  31
  32/*
  33 * List of current VMCI contexts.  Contexts can be added by
  34 * vmci_ctx_create() and removed via vmci_ctx_destroy().
  35 * These, along with context lookup, are protected by the
  36 * list structure's lock.
  37 */
  38static struct {
  39        struct list_head head;
  40        spinlock_t lock; /* Spinlock for context list operations */
  41} ctx_list = {
  42        .head = LIST_HEAD_INIT(ctx_list.head),
  43        .lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
  44};
  45
  46/* Used by contexts that did not set up notify flag pointers */
  47static bool ctx_dummy_notify;
  48
  49static void ctx_signal_notify(struct vmci_ctx *context)
  50{
  51        *context->notify = true;
  52}
  53
  54static void ctx_clear_notify(struct vmci_ctx *context)
  55{
  56        *context->notify = false;
  57}
  58
  59/*
  60 * If nothing requires the attention of the guest, clears both
  61 * notify flag and call.
  62 */
  63static void ctx_clear_notify_call(struct vmci_ctx *context)
  64{
  65        if (context->pending_datagrams == 0 &&
  66            vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
  67                ctx_clear_notify(context);
  68}
  69
  70/*
  71 * Sets the context's notify flag iff datagrams are pending for this
  72 * context.  Called from vmci_setup_notify().
  73 */
  74void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
  75{
  76        spin_lock(&context->lock);
  77        if (context->pending_datagrams)
  78                ctx_signal_notify(context);
  79        spin_unlock(&context->lock);
  80}
  81
  82/*
  83 * Allocates and initializes a VMCI context.
  84 */
  85struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
  86                                 uintptr_t event_hnd,
  87                                 int user_version,
  88                                 const struct cred *cred)
  89{
  90        struct vmci_ctx *context;
  91        int error;
  92
  93        if (cid == VMCI_INVALID_ID) {
  94                pr_devel("Invalid context ID for VMCI context\n");
  95                error = -EINVAL;
  96                goto err_out;
  97        }
  98
  99        if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
 100                pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
 101                         priv_flags);
 102                error = -EINVAL;
 103                goto err_out;
 104        }
 105
 106        if (user_version == 0) {
 107                pr_devel("Invalid suer_version %d\n", user_version);
 108                error = -EINVAL;
 109                goto err_out;
 110        }
 111
 112        context = kzalloc(sizeof(*context), GFP_KERNEL);
 113        if (!context) {
 114                pr_warn("Failed to allocate memory for VMCI context\n");
 115                error = -EINVAL;
 116                goto err_out;
 117        }
 118
 119        kref_init(&context->kref);
 120        spin_lock_init(&context->lock);
 121        INIT_LIST_HEAD(&context->list_item);
 122        INIT_LIST_HEAD(&context->datagram_queue);
 123        INIT_LIST_HEAD(&context->notifier_list);
 124
 125        /* Initialize host-specific VMCI context. */
 126        init_waitqueue_head(&context->host_context.wait_queue);
 127
 128        context->queue_pair_array = vmci_handle_arr_create(0);
 129        if (!context->queue_pair_array) {
 130                error = -ENOMEM;
 131                goto err_free_ctx;
 132        }
 133
 134        context->doorbell_array = vmci_handle_arr_create(0);
 135        if (!context->doorbell_array) {
 136                error = -ENOMEM;
 137                goto err_free_qp_array;
 138        }
 139
 140        context->pending_doorbell_array = vmci_handle_arr_create(0);
 141        if (!context->pending_doorbell_array) {
 142                error = -ENOMEM;
 143                goto err_free_db_array;
 144        }
 145
 146        context->user_version = user_version;
 147
 148        context->priv_flags = priv_flags;
 149
 150        if (cred)
 151                context->cred = get_cred(cred);
 152
 153        context->notify = &ctx_dummy_notify;
 154        context->notify_page = NULL;
 155
 156        /*
 157         * If we collide with an existing context we generate a new
 158         * and use it instead. The VMX will determine if regeneration
 159         * is okay. Since there isn't 4B - 16 VMs running on a given
 160         * host, the below loop will terminate.
 161         */
 162        spin_lock(&ctx_list.lock);
 163
 164        while (vmci_ctx_exists(cid)) {
 165                /* We reserve the lowest 16 ids for fixed contexts. */
 166                cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
 167                if (cid == VMCI_INVALID_ID)
 168                        cid = VMCI_RESERVED_CID_LIMIT;
 169        }
 170        context->cid = cid;
 171
 172        list_add_tail_rcu(&context->list_item, &ctx_list.head);
 173        spin_unlock(&ctx_list.lock);
 174
 175        return context;
 176
 177 err_free_db_array:
 178        vmci_handle_arr_destroy(context->doorbell_array);
 179 err_free_qp_array:
 180        vmci_handle_arr_destroy(context->queue_pair_array);
 181 err_free_ctx:
 182        kfree(context);
 183 err_out:
 184        return ERR_PTR(error);
 185}
 186
 187/*
 188 * Destroy VMCI context.
 189 */
 190void vmci_ctx_destroy(struct vmci_ctx *context)
 191{
 192        spin_lock(&ctx_list.lock);
 193        list_del_rcu(&context->list_item);
 194        spin_unlock(&ctx_list.lock);
 195        synchronize_rcu();
 196
 197        vmci_ctx_put(context);
 198}
 199
 200/*
 201 * Fire notification for all contexts interested in given cid.
 202 */
 203static int ctx_fire_notification(u32 context_id, u32 priv_flags)
 204{
 205        u32 i, array_size;
 206        struct vmci_ctx *sub_ctx;
 207        struct vmci_handle_arr *subscriber_array;
 208        struct vmci_handle context_handle =
 209                vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
 210
 211        /*
 212         * We create an array to hold the subscribers we find when
 213         * scanning through all contexts.
 214         */
 215        subscriber_array = vmci_handle_arr_create(0);
 216        if (subscriber_array == NULL)
 217                return VMCI_ERROR_NO_MEM;
 218
 219        /*
 220         * Scan all contexts to find who is interested in being
 221         * notified about given contextID.
 222         */
 223        rcu_read_lock();
 224        list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
 225                struct vmci_handle_list *node;
 226
 227                /*
 228                 * We only deliver notifications of the removal of
 229                 * contexts, if the two contexts are allowed to
 230                 * interact.
 231                 */
 232                if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
 233                        continue;
 234
 235                list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
 236                        if (!vmci_handle_is_equal(node->handle, context_handle))
 237                                continue;
 238
 239                        vmci_handle_arr_append_entry(&subscriber_array,
 240                                        vmci_make_handle(sub_ctx->cid,
 241                                                         VMCI_EVENT_HANDLER));
 242                }
 243        }
 244        rcu_read_unlock();
 245
 246        /* Fire event to all subscribers. */
 247        array_size = vmci_handle_arr_get_size(subscriber_array);
 248        for (i = 0; i < array_size; i++) {
 249                int result;
 250                struct vmci_event_ctx ev;
 251
 252                ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
 253                ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
 254                                                  VMCI_CONTEXT_RESOURCE_ID);
 255                ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
 256                ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
 257                ev.payload.context_id = context_id;
 258
 259                result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
 260                                                &ev.msg.hdr, false);
 261                if (result < VMCI_SUCCESS) {
 262                        pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
 263                                 ev.msg.event_data.event,
 264                                 ev.msg.hdr.dst.context);
 265                        /* We continue to enqueue on next subscriber. */
 266                }
 267        }
 268        vmci_handle_arr_destroy(subscriber_array);
 269
 270        return VMCI_SUCCESS;
 271}
 272
 273/*
 274 * Returns the current number of pending datagrams. The call may
 275 * also serve as a synchronization point for the datagram queue,
 276 * as no enqueue operations can occur concurrently.
 277 */
 278int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
 279{
 280        struct vmci_ctx *context;
 281
 282        context = vmci_ctx_get(cid);
 283        if (context == NULL)
 284                return VMCI_ERROR_INVALID_ARGS;
 285
 286        spin_lock(&context->lock);
 287        if (pending)
 288                *pending = context->pending_datagrams;
 289        spin_unlock(&context->lock);
 290        vmci_ctx_put(context);
 291
 292        return VMCI_SUCCESS;
 293}
 294
 295/*
 296 * Queues a VMCI datagram for the appropriate target VM context.
 297 */
 298int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
 299{
 300        struct vmci_datagram_queue_entry *dq_entry;
 301        struct vmci_ctx *context;
 302        struct vmci_handle dg_src;
 303        size_t vmci_dg_size;
 304
 305        vmci_dg_size = VMCI_DG_SIZE(dg);
 306        if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
 307                pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
 308                return VMCI_ERROR_INVALID_ARGS;
 309        }
 310
 311        /* Get the target VM's VMCI context. */
 312        context = vmci_ctx_get(cid);
 313        if (!context) {
 314                pr_devel("Invalid context (ID=0x%x)\n", cid);
 315                return VMCI_ERROR_INVALID_ARGS;
 316        }
 317
 318        /* Allocate guest call entry and add it to the target VM's queue. */
 319        dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
 320        if (dq_entry == NULL) {
 321                pr_warn("Failed to allocate memory for datagram\n");
 322                vmci_ctx_put(context);
 323                return VMCI_ERROR_NO_MEM;
 324        }
 325        dq_entry->dg = dg;
 326        dq_entry->dg_size = vmci_dg_size;
 327        dg_src = dg->src;
 328        INIT_LIST_HEAD(&dq_entry->list_item);
 329
 330        spin_lock(&context->lock);
 331
 332        /*
 333         * We put a higher limit on datagrams from the hypervisor.  If
 334         * the pending datagram is not from hypervisor, then we check
 335         * if enqueueing it would exceed the
 336         * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
 337         * the pending datagram is from hypervisor, we allow it to be
 338         * queued at the destination side provided we don't reach the
 339         * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
 340         */
 341        if (context->datagram_queue_size + vmci_dg_size >=
 342            VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
 343            (!vmci_handle_is_equal(dg_src,
 344                                vmci_make_handle
 345                                (VMCI_HYPERVISOR_CONTEXT_ID,
 346                                 VMCI_CONTEXT_RESOURCE_ID)) ||
 347             context->datagram_queue_size + vmci_dg_size >=
 348             VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
 349                spin_unlock(&context->lock);
 350                vmci_ctx_put(context);
 351                kfree(dq_entry);
 352                pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
 353                return VMCI_ERROR_NO_RESOURCES;
 354        }
 355
 356        list_add(&dq_entry->list_item, &context->datagram_queue);
 357        context->pending_datagrams++;
 358        context->datagram_queue_size += vmci_dg_size;
 359        ctx_signal_notify(context);
 360        wake_up(&context->host_context.wait_queue);
 361        spin_unlock(&context->lock);
 362        vmci_ctx_put(context);
 363
 364        return vmci_dg_size;
 365}
 366
 367/*
 368 * Verifies whether a context with the specified context ID exists.
 369 * FIXME: utility is dubious as no decisions can be reliably made
 370 * using this data as context can appear and disappear at any time.
 371 */
 372bool vmci_ctx_exists(u32 cid)
 373{
 374        struct vmci_ctx *context;
 375        bool exists = false;
 376
 377        rcu_read_lock();
 378
 379        list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
 380                if (context->cid == cid) {
 381                        exists = true;
 382                        break;
 383                }
 384        }
 385
 386        rcu_read_unlock();
 387        return exists;
 388}
 389
 390/*
 391 * Retrieves VMCI context corresponding to the given cid.
 392 */
 393struct vmci_ctx *vmci_ctx_get(u32 cid)
 394{
 395        struct vmci_ctx *c, *context = NULL;
 396
 397        if (cid == VMCI_INVALID_ID)
 398                return NULL;
 399
 400        rcu_read_lock();
 401        list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
 402                if (c->cid == cid) {
 403                        /*
 404                         * The context owner drops its own reference to the
 405                         * context only after removing it from the list and
 406                         * waiting for RCU grace period to expire. This
 407                         * means that we are not about to increase the
 408                         * reference count of something that is in the
 409                         * process of being destroyed.
 410                         */
 411                        context = c;
 412                        kref_get(&context->kref);
 413                        break;
 414                }
 415        }
 416        rcu_read_unlock();
 417
 418        return context;
 419}
 420
 421/*
 422 * Deallocates all parts of a context data structure. This
 423 * function doesn't lock the context, because it assumes that
 424 * the caller was holding the last reference to context.
 425 */
 426static void ctx_free_ctx(struct kref *kref)
 427{
 428        struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
 429        struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
 430        struct vmci_handle temp_handle;
 431        struct vmci_handle_list *notifier, *tmp;
 432
 433        /*
 434         * Fire event to all contexts interested in knowing this
 435         * context is dying.
 436         */
 437        ctx_fire_notification(context->cid, context->priv_flags);
 438
 439        /*
 440         * Cleanup all queue pair resources attached to context.  If
 441         * the VM dies without cleaning up, this code will make sure
 442         * that no resources are leaked.
 443         */
 444        temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
 445        while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
 446                if (vmci_qp_broker_detach(temp_handle,
 447                                          context) < VMCI_SUCCESS) {
 448                        /*
 449                         * When vmci_qp_broker_detach() succeeds it
 450                         * removes the handle from the array.  If
 451                         * detach fails, we must remove the handle
 452                         * ourselves.
 453                         */
 454                        vmci_handle_arr_remove_entry(context->queue_pair_array,
 455                                                     temp_handle);
 456                }
 457                temp_handle =
 458                    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
 459        }
 460
 461        /*
 462         * It is fine to destroy this without locking the callQueue, as
 463         * this is the only thread having a reference to the context.
 464         */
 465        list_for_each_entry_safe(dq_entry, dq_entry_tmp,
 466                                 &context->datagram_queue, list_item) {
 467                WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
 468                list_del(&dq_entry->list_item);
 469                kfree(dq_entry->dg);
 470                kfree(dq_entry);
 471        }
 472
 473        list_for_each_entry_safe(notifier, tmp,
 474                                 &context->notifier_list, node) {
 475                list_del(&notifier->node);
 476                kfree(notifier);
 477        }
 478
 479        vmci_handle_arr_destroy(context->queue_pair_array);
 480        vmci_handle_arr_destroy(context->doorbell_array);
 481        vmci_handle_arr_destroy(context->pending_doorbell_array);
 482        vmci_ctx_unset_notify(context);
 483        if (context->cred)
 484                put_cred(context->cred);
 485        kfree(context);
 486}
 487
 488/*
 489 * Drops reference to VMCI context. If this is the last reference to
 490 * the context it will be deallocated. A context is created with
 491 * a reference count of one, and on destroy, it is removed from
 492 * the context list before its reference count is decremented. Thus,
 493 * if we reach zero, we are sure that nobody else are about to increment
 494 * it (they need the entry in the context list for that), and so there
 495 * is no need for locking.
 496 */
 497void vmci_ctx_put(struct vmci_ctx *context)
 498{
 499        kref_put(&context->kref, ctx_free_ctx);
 500}
 501
 502/*
 503 * Dequeues the next datagram and returns it to caller.
 504 * The caller passes in a pointer to the max size datagram
 505 * it can handle and the datagram is only unqueued if the
 506 * size is less than max_size. If larger max_size is set to
 507 * the size of the datagram to give the caller a chance to
 508 * set up a larger buffer for the guestcall.
 509 */
 510int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
 511                              size_t *max_size,
 512                              struct vmci_datagram **dg)
 513{
 514        struct vmci_datagram_queue_entry *dq_entry;
 515        struct list_head *list_item;
 516        int rv;
 517
 518        /* Dequeue the next datagram entry. */
 519        spin_lock(&context->lock);
 520        if (context->pending_datagrams == 0) {
 521                ctx_clear_notify_call(context);
 522                spin_unlock(&context->lock);
 523                pr_devel("No datagrams pending\n");
 524                return VMCI_ERROR_NO_MORE_DATAGRAMS;
 525        }
 526
 527        list_item = context->datagram_queue.next;
 528
 529        dq_entry =
 530            list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
 531
 532        /* Check size of caller's buffer. */
 533        if (*max_size < dq_entry->dg_size) {
 534                *max_size = dq_entry->dg_size;
 535                spin_unlock(&context->lock);
 536                pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
 537                         (u32) *max_size);
 538                return VMCI_ERROR_NO_MEM;
 539        }
 540
 541        list_del(list_item);
 542        context->pending_datagrams--;
 543        context->datagram_queue_size -= dq_entry->dg_size;
 544        if (context->pending_datagrams == 0) {
 545                ctx_clear_notify_call(context);
 546                rv = VMCI_SUCCESS;
 547        } else {
 548                /*
 549                 * Return the size of the next datagram.
 550                 */
 551                struct vmci_datagram_queue_entry *next_entry;
 552
 553                list_item = context->datagram_queue.next;
 554                next_entry =
 555                    list_entry(list_item, struct vmci_datagram_queue_entry,
 556                               list_item);
 557
 558                /*
 559                 * The following size_t -> int truncation is fine as
 560                 * the maximum size of a (routable) datagram is 68KB.
 561                 */
 562                rv = (int)next_entry->dg_size;
 563        }
 564        spin_unlock(&context->lock);
 565
 566        /* Caller must free datagram. */
 567        *dg = dq_entry->dg;
 568        dq_entry->dg = NULL;
 569        kfree(dq_entry);
 570
 571        return rv;
 572}
 573
 574/*
 575 * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
 576 * page mapped/locked by vmci_setup_notify().
 577 */
 578void vmci_ctx_unset_notify(struct vmci_ctx *context)
 579{
 580        struct page *notify_page;
 581
 582        spin_lock(&context->lock);
 583
 584        notify_page = context->notify_page;
 585        context->notify = &ctx_dummy_notify;
 586        context->notify_page = NULL;
 587
 588        spin_unlock(&context->lock);
 589
 590        if (notify_page) {
 591                kunmap(notify_page);
 592                put_page(notify_page);
 593        }
 594}
 595
 596/*
 597 * Add remote_cid to list of contexts current contexts wants
 598 * notifications from/about.
 599 */
 600int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
 601{
 602        struct vmci_ctx *context;
 603        struct vmci_handle_list *notifier, *n;
 604        int result;
 605        bool exists = false;
 606
 607        context = vmci_ctx_get(context_id);
 608        if (!context)
 609                return VMCI_ERROR_NOT_FOUND;
 610
 611        if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
 612                pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
 613                         context_id, remote_cid);
 614                result = VMCI_ERROR_DST_UNREACHABLE;
 615                goto out;
 616        }
 617
 618        if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
 619                result = VMCI_ERROR_NO_ACCESS;
 620                goto out;
 621        }
 622
 623        notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
 624        if (!notifier) {
 625                result = VMCI_ERROR_NO_MEM;
 626                goto out;
 627        }
 628
 629        INIT_LIST_HEAD(&notifier->node);
 630        notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
 631
 632        spin_lock(&context->lock);
 633
 634        list_for_each_entry(n, &context->notifier_list, node) {
 635                if (vmci_handle_is_equal(n->handle, notifier->handle)) {
 636                        exists = true;
 637                        break;
 638                }
 639        }
 640
 641        if (exists) {
 642                kfree(notifier);
 643                result = VMCI_ERROR_ALREADY_EXISTS;
 644        } else {
 645                list_add_tail_rcu(&notifier->node, &context->notifier_list);
 646                context->n_notifiers++;
 647                result = VMCI_SUCCESS;
 648        }
 649
 650        spin_unlock(&context->lock);
 651
 652 out:
 653        vmci_ctx_put(context);
 654        return result;
 655}
 656
 657/*
 658 * Remove remote_cid from current context's list of contexts it is
 659 * interested in getting notifications from/about.
 660 */
 661int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
 662{
 663        struct vmci_ctx *context;
 664        struct vmci_handle_list *notifier, *tmp;
 665        struct vmci_handle handle;
 666        bool found = false;
 667
 668        context = vmci_ctx_get(context_id);
 669        if (!context)
 670                return VMCI_ERROR_NOT_FOUND;
 671
 672        handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
 673
 674        spin_lock(&context->lock);
 675        list_for_each_entry_safe(notifier, tmp,
 676                                 &context->notifier_list, node) {
 677                if (vmci_handle_is_equal(notifier->handle, handle)) {
 678                        list_del_rcu(&notifier->node);
 679                        context->n_notifiers--;
 680                        found = true;
 681                        break;
 682                }
 683        }
 684        spin_unlock(&context->lock);
 685
 686        if (found) {
 687                synchronize_rcu();
 688                kfree(notifier);
 689        }
 690
 691        vmci_ctx_put(context);
 692
 693        return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
 694}
 695
 696static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
 697                                        u32 *buf_size, void **pbuf)
 698{
 699        u32 *notifiers;
 700        size_t data_size;
 701        struct vmci_handle_list *entry;
 702        int i = 0;
 703
 704        if (context->n_notifiers == 0) {
 705                *buf_size = 0;
 706                *pbuf = NULL;
 707                return VMCI_SUCCESS;
 708        }
 709
 710        data_size = context->n_notifiers * sizeof(*notifiers);
 711        if (*buf_size < data_size) {
 712                *buf_size = data_size;
 713                return VMCI_ERROR_MORE_DATA;
 714        }
 715
 716        notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
 717        if (!notifiers)
 718                return VMCI_ERROR_NO_MEM;
 719
 720        list_for_each_entry(entry, &context->notifier_list, node)
 721                notifiers[i++] = entry->handle.context;
 722
 723        *buf_size = data_size;
 724        *pbuf = notifiers;
 725        return VMCI_SUCCESS;
 726}
 727
 728static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
 729                                        u32 *buf_size, void **pbuf)
 730{
 731        struct dbell_cpt_state *dbells;
 732        size_t n_doorbells;
 733        int i;
 734
 735        n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
 736        if (n_doorbells > 0) {
 737                size_t data_size = n_doorbells * sizeof(*dbells);
 738                if (*buf_size < data_size) {
 739                        *buf_size = data_size;
 740                        return VMCI_ERROR_MORE_DATA;
 741                }
 742
 743                dbells = kmalloc(data_size, GFP_ATOMIC);
 744                if (!dbells)
 745                        return VMCI_ERROR_NO_MEM;
 746
 747                for (i = 0; i < n_doorbells; i++)
 748                        dbells[i].handle = vmci_handle_arr_get_entry(
 749                                                context->doorbell_array, i);
 750
 751                *buf_size = data_size;
 752                *pbuf = dbells;
 753        } else {
 754                *buf_size = 0;
 755                *pbuf = NULL;
 756        }
 757
 758        return VMCI_SUCCESS;
 759}
 760
 761/*
 762 * Get current context's checkpoint state of given type.
 763 */
 764int vmci_ctx_get_chkpt_state(u32 context_id,
 765                             u32 cpt_type,
 766                             u32 *buf_size,
 767                             void **pbuf)
 768{
 769        struct vmci_ctx *context;
 770        int result;
 771
 772        context = vmci_ctx_get(context_id);
 773        if (!context)
 774                return VMCI_ERROR_NOT_FOUND;
 775
 776        spin_lock(&context->lock);
 777
 778        switch (cpt_type) {
 779        case VMCI_NOTIFICATION_CPT_STATE:
 780                result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
 781                break;
 782
 783        case VMCI_WELLKNOWN_CPT_STATE:
 784                /*
 785                 * For compatibility with VMX'en with VM to VM communication, we
 786                 * always return zero wellknown handles.
 787                 */
 788
 789                *buf_size = 0;
 790                *pbuf = NULL;
 791                result = VMCI_SUCCESS;
 792                break;
 793
 794        case VMCI_DOORBELL_CPT_STATE:
 795                result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
 796                break;
 797
 798        default:
 799                pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
 800                result = VMCI_ERROR_INVALID_ARGS;
 801                break;
 802        }
 803
 804        spin_unlock(&context->lock);
 805        vmci_ctx_put(context);
 806
 807        return result;
 808}
 809
 810/*
 811 * Set current context's checkpoint state of given type.
 812 */
 813int vmci_ctx_set_chkpt_state(u32 context_id,
 814                             u32 cpt_type,
 815                             u32 buf_size,
 816                             void *cpt_buf)
 817{
 818        u32 i;
 819        u32 current_id;
 820        int result = VMCI_SUCCESS;
 821        u32 num_ids = buf_size / sizeof(u32);
 822
 823        if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
 824                /*
 825                 * We would end up here if VMX with VM to VM communication
 826                 * attempts to restore a checkpoint with wellknown handles.
 827                 */
 828                pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
 829                return VMCI_ERROR_OBSOLETE;
 830        }
 831
 832        if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
 833                pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
 834                return VMCI_ERROR_INVALID_ARGS;
 835        }
 836
 837        for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
 838                current_id = ((u32 *)cpt_buf)[i];
 839                result = vmci_ctx_add_notification(context_id, current_id);
 840                if (result != VMCI_SUCCESS)
 841                        break;
 842        }
 843        if (result != VMCI_SUCCESS)
 844                pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
 845                         cpt_type, result);
 846
 847        return result;
 848}
 849
 850/*
 851 * Retrieves the specified context's pending notifications in the
 852 * form of a handle array. The handle arrays returned are the
 853 * actual data - not a copy and should not be modified by the
 854 * caller. They must be released using
 855 * vmci_ctx_rcv_notifications_release.
 856 */
 857int vmci_ctx_rcv_notifications_get(u32 context_id,
 858                                   struct vmci_handle_arr **db_handle_array,
 859                                   struct vmci_handle_arr **qp_handle_array)
 860{
 861        struct vmci_ctx *context;
 862        int result = VMCI_SUCCESS;
 863
 864        context = vmci_ctx_get(context_id);
 865        if (context == NULL)
 866                return VMCI_ERROR_NOT_FOUND;
 867
 868        spin_lock(&context->lock);
 869
 870        *db_handle_array = context->pending_doorbell_array;
 871        context->pending_doorbell_array = vmci_handle_arr_create(0);
 872        if (!context->pending_doorbell_array) {
 873                context->pending_doorbell_array = *db_handle_array;
 874                *db_handle_array = NULL;
 875                result = VMCI_ERROR_NO_MEM;
 876        }
 877        *qp_handle_array = NULL;
 878
 879        spin_unlock(&context->lock);
 880        vmci_ctx_put(context);
 881
 882        return result;
 883}
 884
 885/*
 886 * Releases handle arrays with pending notifications previously
 887 * retrieved using vmci_ctx_rcv_notifications_get. If the
 888 * notifications were not successfully handed over to the guest,
 889 * success must be false.
 890 */
 891void vmci_ctx_rcv_notifications_release(u32 context_id,
 892                                        struct vmci_handle_arr *db_handle_array,
 893                                        struct vmci_handle_arr *qp_handle_array,
 894                                        bool success)
 895{
 896        struct vmci_ctx *context = vmci_ctx_get(context_id);
 897
 898        spin_lock(&context->lock);
 899        if (!success) {
 900                struct vmci_handle handle;
 901
 902                /*
 903                 * New notifications may have been added while we were not
 904                 * holding the context lock, so we transfer any new pending
 905                 * doorbell notifications to the old array, and reinstate the
 906                 * old array.
 907                 */
 908
 909                handle = vmci_handle_arr_remove_tail(
 910                                        context->pending_doorbell_array);
 911                while (!vmci_handle_is_invalid(handle)) {
 912                        if (!vmci_handle_arr_has_entry(db_handle_array,
 913                                                       handle)) {
 914                                vmci_handle_arr_append_entry(
 915                                                &db_handle_array, handle);
 916                        }
 917                        handle = vmci_handle_arr_remove_tail(
 918                                        context->pending_doorbell_array);
 919                }
 920                vmci_handle_arr_destroy(context->pending_doorbell_array);
 921                context->pending_doorbell_array = db_handle_array;
 922                db_handle_array = NULL;
 923        } else {
 924                ctx_clear_notify_call(context);
 925        }
 926        spin_unlock(&context->lock);
 927        vmci_ctx_put(context);
 928
 929        if (db_handle_array)
 930                vmci_handle_arr_destroy(db_handle_array);
 931
 932        if (qp_handle_array)
 933                vmci_handle_arr_destroy(qp_handle_array);
 934}
 935
 936/*
 937 * Registers that a new doorbell handle has been allocated by the
 938 * context. Only doorbell handles registered can be notified.
 939 */
 940int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
 941{
 942        struct vmci_ctx *context;
 943        int result;
 944
 945        if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
 946                return VMCI_ERROR_INVALID_ARGS;
 947
 948        context = vmci_ctx_get(context_id);
 949        if (context == NULL)
 950                return VMCI_ERROR_NOT_FOUND;
 951
 952        spin_lock(&context->lock);
 953        if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
 954                vmci_handle_arr_append_entry(&context->doorbell_array, handle);
 955                result = VMCI_SUCCESS;
 956        } else {
 957                result = VMCI_ERROR_DUPLICATE_ENTRY;
 958        }
 959
 960        spin_unlock(&context->lock);
 961        vmci_ctx_put(context);
 962
 963        return result;
 964}
 965
 966/*
 967 * Unregisters a doorbell handle that was previously registered
 968 * with vmci_ctx_dbell_create.
 969 */
 970int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
 971{
 972        struct vmci_ctx *context;
 973        struct vmci_handle removed_handle;
 974
 975        if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
 976                return VMCI_ERROR_INVALID_ARGS;
 977
 978        context = vmci_ctx_get(context_id);
 979        if (context == NULL)
 980                return VMCI_ERROR_NOT_FOUND;
 981
 982        spin_lock(&context->lock);
 983        removed_handle =
 984            vmci_handle_arr_remove_entry(context->doorbell_array, handle);
 985        vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
 986        spin_unlock(&context->lock);
 987
 988        vmci_ctx_put(context);
 989
 990        return vmci_handle_is_invalid(removed_handle) ?
 991            VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
 992}
 993
 994/*
 995 * Unregisters all doorbell handles that were previously
 996 * registered with vmci_ctx_dbell_create.
 997 */
 998int vmci_ctx_dbell_destroy_all(u32 context_id)
 999{
1000        struct vmci_ctx *context;
1001        struct vmci_handle handle;
1002
1003        if (context_id == VMCI_INVALID_ID)
1004                return VMCI_ERROR_INVALID_ARGS;
1005
1006        context = vmci_ctx_get(context_id);
1007        if (context == NULL)
1008                return VMCI_ERROR_NOT_FOUND;
1009
1010        spin_lock(&context->lock);
1011        do {
1012                struct vmci_handle_arr *arr = context->doorbell_array;
1013                handle = vmci_handle_arr_remove_tail(arr);
1014        } while (!vmci_handle_is_invalid(handle));
1015        do {
1016                struct vmci_handle_arr *arr = context->pending_doorbell_array;
1017                handle = vmci_handle_arr_remove_tail(arr);
1018        } while (!vmci_handle_is_invalid(handle));
1019        spin_unlock(&context->lock);
1020
1021        vmci_ctx_put(context);
1022
1023        return VMCI_SUCCESS;
1024}
1025
1026/*
1027 * Registers a notification of a doorbell handle initiated by the
1028 * specified source context. The notification of doorbells are
1029 * subject to the same isolation rules as datagram delivery. To
1030 * allow host side senders of notifications a finer granularity
1031 * of sender rights than those assigned to the sending context
1032 * itself, the host context is required to specify a different
1033 * set of privilege flags that will override the privileges of
1034 * the source context.
1035 */
1036int vmci_ctx_notify_dbell(u32 src_cid,
1037                          struct vmci_handle handle,
1038                          u32 src_priv_flags)
1039{
1040        struct vmci_ctx *dst_context;
1041        int result;
1042
1043        if (vmci_handle_is_invalid(handle))
1044                return VMCI_ERROR_INVALID_ARGS;
1045
1046        /* Get the target VM's VMCI context. */
1047        dst_context = vmci_ctx_get(handle.context);
1048        if (!dst_context) {
1049                pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1050                return VMCI_ERROR_NOT_FOUND;
1051        }
1052
1053        if (src_cid != handle.context) {
1054                u32 dst_priv_flags;
1055
1056                if (VMCI_CONTEXT_IS_VM(src_cid) &&
1057                    VMCI_CONTEXT_IS_VM(handle.context)) {
1058                        pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1059                                 src_cid, handle.context);
1060                        result = VMCI_ERROR_DST_UNREACHABLE;
1061                        goto out;
1062                }
1063
1064                result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1065                if (result < VMCI_SUCCESS) {
1066                        pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1067                                handle.context, handle.resource);
1068                        goto out;
1069                }
1070
1071                if (src_cid != VMCI_HOST_CONTEXT_ID ||
1072                    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1073                        src_priv_flags = vmci_context_get_priv_flags(src_cid);
1074                }
1075
1076                if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1077                        result = VMCI_ERROR_NO_ACCESS;
1078                        goto out;
1079                }
1080        }
1081
1082        if (handle.context == VMCI_HOST_CONTEXT_ID) {
1083                result = vmci_dbell_host_context_notify(src_cid, handle);
1084        } else {
1085                spin_lock(&dst_context->lock);
1086
1087                if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1088                                               handle)) {
1089                        result = VMCI_ERROR_NOT_FOUND;
1090                } else {
1091                        if (!vmci_handle_arr_has_entry(
1092                                        dst_context->pending_doorbell_array,
1093                                        handle)) {
1094                                vmci_handle_arr_append_entry(
1095                                        &dst_context->pending_doorbell_array,
1096                                        handle);
1097
1098                                ctx_signal_notify(dst_context);
1099                                wake_up(&dst_context->host_context.wait_queue);
1100
1101                        }
1102                        result = VMCI_SUCCESS;
1103                }
1104                spin_unlock(&dst_context->lock);
1105        }
1106
1107 out:
1108        vmci_ctx_put(dst_context);
1109
1110        return result;
1111}
1112
1113bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1114{
1115        return context && context->user_version >= VMCI_VERSION_HOSTQP;
1116}
1117
1118/*
1119 * Registers that a new queue pair handle has been allocated by
1120 * the context.
1121 */
1122int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1123{
1124        int result;
1125
1126        if (context == NULL || vmci_handle_is_invalid(handle))
1127                return VMCI_ERROR_INVALID_ARGS;
1128
1129        if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
1130                vmci_handle_arr_append_entry(&context->queue_pair_array,
1131                                             handle);
1132                result = VMCI_SUCCESS;
1133        } else {
1134                result = VMCI_ERROR_DUPLICATE_ENTRY;
1135        }
1136
1137        return result;
1138}
1139
1140/*
1141 * Unregisters a queue pair handle that was previously registered
1142 * with vmci_ctx_qp_create.
1143 */
1144int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1145{
1146        struct vmci_handle hndl;
1147
1148        if (context == NULL || vmci_handle_is_invalid(handle))
1149                return VMCI_ERROR_INVALID_ARGS;
1150
1151        hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1152
1153        return vmci_handle_is_invalid(hndl) ?
1154                VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1155}
1156
1157/*
1158 * Determines whether a given queue pair handle is registered
1159 * with the given context.
1160 */
1161bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1162{
1163        if (context == NULL || vmci_handle_is_invalid(handle))
1164                return false;
1165
1166        return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1167}
1168
1169/*
1170 * vmci_context_get_priv_flags() - Retrieve privilege flags.
1171 * @context_id: The context ID of the VMCI context.
1172 *
1173 * Retrieves privilege flags of the given VMCI context ID.
1174 */
1175u32 vmci_context_get_priv_flags(u32 context_id)
1176{
1177        if (vmci_host_code_active()) {
1178                u32 flags;
1179                struct vmci_ctx *context;
1180
1181                context = vmci_ctx_get(context_id);
1182                if (!context)
1183                        return VMCI_LEAST_PRIVILEGE_FLAGS;
1184
1185                flags = context->priv_flags;
1186                vmci_ctx_put(context);
1187                return flags;
1188        }
1189        return VMCI_NO_PRIVILEGE_FLAGS;
1190}
1191EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1192
1193/*
1194 * vmci_is_context_owner() - Determimnes if user is the context owner
1195 * @context_id: The context ID of the VMCI context.
1196 * @uid:        The host user id (real kernel value).
1197 *
1198 * Determines whether a given UID is the owner of given VMCI context.
1199 */
1200bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1201{
1202        bool is_owner = false;
1203
1204        if (vmci_host_code_active()) {
1205                struct vmci_ctx *context = vmci_ctx_get(context_id);
1206                if (context) {
1207                        if (context->cred)
1208                                is_owner = uid_eq(context->cred->uid, uid);
1209                        vmci_ctx_put(context);
1210                }
1211        }
1212
1213        return is_owner;
1214}
1215EXPORT_SYMBOL_GPL(vmci_is_context_owner);
1216