linux/drivers/block/xen-blkback/common.h
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or
   3 * modify it under the terms of the GNU General Public License version 2
   4 * as published by the Free Software Foundation; or, when distributed
   5 * separately from the Linux kernel or incorporated into other
   6 * software packages, subject to the following license:
   7 *
   8 * Permission is hereby granted, free of charge, to any person obtaining a copy
   9 * of this source file (the "Software"), to deal in the Software without
  10 * restriction, including without limitation the rights to use, copy, modify,
  11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  12 * and to permit persons to whom the Software is furnished to do so, subject to
  13 * the following conditions:
  14 *
  15 * The above copyright notice and this permission notice shall be included in
  16 * all copies or substantial portions of the Software.
  17 *
  18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24 * IN THE SOFTWARE.
  25 */
  26
  27#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
  28#define __XEN_BLKIF__BACKEND__COMMON_H__
  29
  30#include <linux/module.h>
  31#include <linux/interrupt.h>
  32#include <linux/slab.h>
  33#include <linux/blkdev.h>
  34#include <linux/vmalloc.h>
  35#include <linux/wait.h>
  36#include <linux/io.h>
  37#include <linux/rbtree.h>
  38#include <asm/setup.h>
  39#include <asm/pgalloc.h>
  40#include <asm/hypervisor.h>
  41#include <xen/grant_table.h>
  42#include <xen/page.h>
  43#include <xen/xenbus.h>
  44#include <xen/interface/io/ring.h>
  45#include <xen/interface/io/blkif.h>
  46#include <xen/interface/io/protocols.h>
  47
  48extern unsigned int xen_blkif_max_ring_order;
  49extern unsigned int xenblk_max_queues;
  50/*
  51 * This is the maximum number of segments that would be allowed in indirect
  52 * requests. This value will also be passed to the frontend.
  53 */
  54#define MAX_INDIRECT_SEGMENTS 256
  55
  56/*
  57 * Xen use 4K pages. The guest may use different page size (4K or 64K)
  58 * Number of Xen pages per segment
  59 */
  60#define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
  61
  62#define XEN_PAGES_PER_INDIRECT_FRAME \
  63        (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
  64#define SEGS_PER_INDIRECT_FRAME \
  65        (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
  66
  67#define MAX_INDIRECT_PAGES \
  68        ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  69#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
  70
  71/* Not a real protocol.  Used to generate ring structs which contain
  72 * the elements common to all protocols only.  This way we get a
  73 * compiler-checkable way to use common struct elements, so we can
  74 * avoid using switch(protocol) in a number of places.  */
  75struct blkif_common_request {
  76        char dummy;
  77};
  78struct blkif_common_response {
  79        char dummy;
  80};
  81
  82struct blkif_x86_32_request_rw {
  83        uint8_t        nr_segments;  /* number of segments                   */
  84        blkif_vdev_t   handle;       /* only for read/write requests         */
  85        uint64_t       id;           /* private guest value, echoed in resp  */
  86        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  87        struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  88} __attribute__((__packed__));
  89
  90struct blkif_x86_32_request_discard {
  91        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
  92        blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
  93        uint64_t       id;           /* private guest value, echoed in resp  */
  94        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  95        uint64_t       nr_sectors;
  96} __attribute__((__packed__));
  97
  98struct blkif_x86_32_request_other {
  99        uint8_t        _pad1;
 100        blkif_vdev_t   _pad2;
 101        uint64_t       id;           /* private guest value, echoed in resp  */
 102} __attribute__((__packed__));
 103
 104struct blkif_x86_32_request_indirect {
 105        uint8_t        indirect_op;
 106        uint16_t       nr_segments;
 107        uint64_t       id;
 108        blkif_sector_t sector_number;
 109        blkif_vdev_t   handle;
 110        uint16_t       _pad1;
 111        grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 112        /*
 113         * The maximum number of indirect segments (and pages) that will
 114         * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 115         * is also exported to the guest (via xenstore
 116         * feature-max-indirect-segments entry), so the frontend knows how
 117         * many indirect segments the backend supports.
 118         */
 119        uint64_t       _pad2;        /* make it 64 byte aligned */
 120} __attribute__((__packed__));
 121
 122struct blkif_x86_32_request {
 123        uint8_t        operation;    /* BLKIF_OP_???                         */
 124        union {
 125                struct blkif_x86_32_request_rw rw;
 126                struct blkif_x86_32_request_discard discard;
 127                struct blkif_x86_32_request_other other;
 128                struct blkif_x86_32_request_indirect indirect;
 129        } u;
 130} __attribute__((__packed__));
 131
 132/* i386 protocol version */
 133#pragma pack(push, 4)
 134struct blkif_x86_32_response {
 135        uint64_t        id;              /* copied from request */
 136        uint8_t         operation;       /* copied from request */
 137        int16_t         status;          /* BLKIF_RSP_???       */
 138};
 139#pragma pack(pop)
 140/* x86_64 protocol version */
 141
 142struct blkif_x86_64_request_rw {
 143        uint8_t        nr_segments;  /* number of segments                   */
 144        blkif_vdev_t   handle;       /* only for read/write requests         */
 145        uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
 146        uint64_t       id;
 147        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 148        struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 149} __attribute__((__packed__));
 150
 151struct blkif_x86_64_request_discard {
 152        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
 153        blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
 154        uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
 155        uint64_t       id;
 156        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 157        uint64_t       nr_sectors;
 158} __attribute__((__packed__));
 159
 160struct blkif_x86_64_request_other {
 161        uint8_t        _pad1;
 162        blkif_vdev_t   _pad2;
 163        uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
 164        uint64_t       id;           /* private guest value, echoed in resp  */
 165} __attribute__((__packed__));
 166
 167struct blkif_x86_64_request_indirect {
 168        uint8_t        indirect_op;
 169        uint16_t       nr_segments;
 170        uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
 171        uint64_t       id;
 172        blkif_sector_t sector_number;
 173        blkif_vdev_t   handle;
 174        uint16_t       _pad2;
 175        grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 176        /*
 177         * The maximum number of indirect segments (and pages) that will
 178         * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 179         * is also exported to the guest (via xenstore
 180         * feature-max-indirect-segments entry), so the frontend knows how
 181         * many indirect segments the backend supports.
 182         */
 183        uint32_t       _pad3;        /* make it 64 byte aligned */
 184} __attribute__((__packed__));
 185
 186struct blkif_x86_64_request {
 187        uint8_t        operation;    /* BLKIF_OP_???                         */
 188        union {
 189                struct blkif_x86_64_request_rw rw;
 190                struct blkif_x86_64_request_discard discard;
 191                struct blkif_x86_64_request_other other;
 192                struct blkif_x86_64_request_indirect indirect;
 193        } u;
 194} __attribute__((__packed__));
 195
 196struct blkif_x86_64_response {
 197        uint64_t       __attribute__((__aligned__(8))) id;
 198        uint8_t         operation;       /* copied from request */
 199        int16_t         status;          /* BLKIF_RSP_???       */
 200};
 201
 202DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
 203                  struct blkif_common_response);
 204DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
 205                  struct blkif_x86_32_response);
 206DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
 207                  struct blkif_x86_64_response);
 208
 209union blkif_back_rings {
 210        struct blkif_back_ring        native;
 211        struct blkif_common_back_ring common;
 212        struct blkif_x86_32_back_ring x86_32;
 213        struct blkif_x86_64_back_ring x86_64;
 214};
 215
 216enum blkif_protocol {
 217        BLKIF_PROTOCOL_NATIVE = 1,
 218        BLKIF_PROTOCOL_X86_32 = 2,
 219        BLKIF_PROTOCOL_X86_64 = 3,
 220};
 221
 222/*
 223 * Default protocol if the frontend doesn't specify one.
 224 */
 225#ifdef CONFIG_X86
 226#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
 227#else
 228#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
 229#endif
 230
 231struct xen_vbd {
 232        /* What the domain refers to this vbd as. */
 233        blkif_vdev_t            handle;
 234        /* Non-zero -> read-only */
 235        unsigned char           readonly;
 236        /* VDISK_xxx */
 237        unsigned char           type;
 238        /* phys device that this vbd maps to. */
 239        u32                     pdevice;
 240        struct block_device     *bdev;
 241        /* Cached size parameter. */
 242        sector_t                size;
 243        unsigned int            flush_support:1;
 244        unsigned int            discard_secure:1;
 245        unsigned int            feature_gnt_persistent:1;
 246        unsigned int            overflow_max_grants:1;
 247};
 248
 249struct backend_info;
 250
 251/* Number of available flags */
 252#define PERSISTENT_GNT_FLAGS_SIZE       2
 253/* This persistent grant is currently in use */
 254#define PERSISTENT_GNT_ACTIVE           0
 255/*
 256 * This persistent grant has been used, this flag is set when we remove the
 257 * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
 258 */
 259#define PERSISTENT_GNT_WAS_ACTIVE       1
 260
 261/* Number of requests that we can fit in a ring */
 262#define XEN_BLKIF_REQS_PER_PAGE         32
 263
 264struct persistent_gnt {
 265        struct page *page;
 266        grant_ref_t gnt;
 267        grant_handle_t handle;
 268        DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
 269        struct rb_node node;
 270        struct list_head remove_node;
 271};
 272
 273/* Per-ring information. */
 274struct xen_blkif_ring {
 275        /* Physical parameters of the comms window. */
 276        unsigned int            irq;
 277        union blkif_back_rings  blk_rings;
 278        void                    *blk_ring;
 279        /* Private fields. */
 280        spinlock_t              blk_ring_lock;
 281
 282        wait_queue_head_t       wq;
 283        atomic_t                inflight;
 284        /* One thread per blkif ring. */
 285        struct task_struct      *xenblkd;
 286        unsigned int            waiting_reqs;
 287
 288        /* List of all 'pending_req' available */
 289        struct list_head        pending_free;
 290        /* And its spinlock. */
 291        spinlock_t              pending_free_lock;
 292        wait_queue_head_t       pending_free_wq;
 293
 294        /* Tree to store persistent grants. */
 295        spinlock_t              pers_gnts_lock;
 296        struct rb_root          persistent_gnts;
 297        unsigned int            persistent_gnt_c;
 298        atomic_t                persistent_gnt_in_use;
 299        unsigned long           next_lru;
 300
 301        /* Statistics. */
 302        unsigned long           st_print;
 303        unsigned long long      st_rd_req;
 304        unsigned long long      st_wr_req;
 305        unsigned long long      st_oo_req;
 306        unsigned long long      st_f_req;
 307        unsigned long long      st_ds_req;
 308        unsigned long long      st_rd_sect;
 309        unsigned long long      st_wr_sect;
 310
 311        /* Used by the kworker that offload work from the persistent purge. */
 312        struct list_head        persistent_purge_list;
 313        struct work_struct      persistent_purge_work;
 314
 315        /* Buffer of free pages to map grant refs. */
 316        spinlock_t              free_pages_lock;
 317        int                     free_pages_num;
 318        struct list_head        free_pages;
 319
 320        struct work_struct      free_work;
 321        /* Thread shutdown wait queue. */
 322        wait_queue_head_t       shutdown_wq;
 323        struct xen_blkif        *blkif;
 324};
 325
 326struct xen_blkif {
 327        /* Unique identifier for this interface. */
 328        domid_t                 domid;
 329        unsigned int            handle;
 330        /* Comms information. */
 331        enum blkif_protocol     blk_protocol;
 332        /* The VBD attached to this interface. */
 333        struct xen_vbd          vbd;
 334        /* Back pointer to the backend_info. */
 335        struct backend_info     *be;
 336        atomic_t                refcnt;
 337        /* for barrier (drain) requests */
 338        struct completion       drain_complete;
 339        atomic_t                drain;
 340
 341        struct work_struct      free_work;
 342        unsigned int            nr_ring_pages;
 343        /* All rings for this device. */
 344        struct xen_blkif_ring   *rings;
 345        unsigned int            nr_rings;
 346};
 347
 348struct seg_buf {
 349        unsigned long offset;
 350        unsigned int nsec;
 351};
 352
 353struct grant_page {
 354        struct page             *page;
 355        struct persistent_gnt   *persistent_gnt;
 356        grant_handle_t          handle;
 357        grant_ref_t             gref;
 358};
 359
 360/*
 361 * Each outstanding request that we've passed to the lower device layers has a
 362 * 'pending_req' allocated to it. Each buffer_head that completes decrements
 363 * the pendcnt towards zero. When it hits zero, the specified domain has a
 364 * response queued for it, with the saved 'id' passed back.
 365 */
 366struct pending_req {
 367        struct xen_blkif_ring   *ring;
 368        u64                     id;
 369        int                     nr_segs;
 370        atomic_t                pendcnt;
 371        unsigned short          operation;
 372        int                     status;
 373        struct list_head        free_list;
 374        struct grant_page       *segments[MAX_INDIRECT_SEGMENTS];
 375        /* Indirect descriptors */
 376        struct grant_page       *indirect_pages[MAX_INDIRECT_PAGES];
 377        struct seg_buf          seg[MAX_INDIRECT_SEGMENTS];
 378        struct bio              *biolist[MAX_INDIRECT_SEGMENTS];
 379        struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
 380        struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
 381        struct gntab_unmap_queue_data gnttab_unmap_data;
 382};
 383
 384
 385#define vbd_sz(_v)      ((_v)->bdev->bd_part ? \
 386                         (_v)->bdev->bd_part->nr_sects : \
 387                          get_capacity((_v)->bdev->bd_disk))
 388
 389#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 390#define xen_blkif_put(_b)                               \
 391        do {                                            \
 392                if (atomic_dec_and_test(&(_b)->refcnt)) \
 393                        schedule_work(&(_b)->free_work);\
 394        } while (0)
 395
 396struct phys_req {
 397        unsigned short          dev;
 398        blkif_sector_t          nr_sects;
 399        struct block_device     *bdev;
 400        blkif_sector_t          sector_number;
 401};
 402int xen_blkif_interface_init(void);
 403
 404int xen_blkif_xenbus_init(void);
 405
 406irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 407int xen_blkif_schedule(void *arg);
 408int xen_blkif_purge_persistent(void *arg);
 409void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
 410
 411int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 412                              struct backend_info *be, int state);
 413
 414int xen_blkbk_barrier(struct xenbus_transaction xbt,
 415                      struct backend_info *be, int state);
 416struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 417void xen_blkbk_unmap_purged_grants(struct work_struct *work);
 418
 419static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 420                                        struct blkif_x86_32_request *src)
 421{
 422        int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 423        dst->operation = READ_ONCE(src->operation);
 424        switch (dst->operation) {
 425        case BLKIF_OP_READ:
 426        case BLKIF_OP_WRITE:
 427        case BLKIF_OP_WRITE_BARRIER:
 428        case BLKIF_OP_FLUSH_DISKCACHE:
 429                dst->u.rw.nr_segments = src->u.rw.nr_segments;
 430                dst->u.rw.handle = src->u.rw.handle;
 431                dst->u.rw.id = src->u.rw.id;
 432                dst->u.rw.sector_number = src->u.rw.sector_number;
 433                barrier();
 434                if (n > dst->u.rw.nr_segments)
 435                        n = dst->u.rw.nr_segments;
 436                for (i = 0; i < n; i++)
 437                        dst->u.rw.seg[i] = src->u.rw.seg[i];
 438                break;
 439        case BLKIF_OP_DISCARD:
 440                dst->u.discard.flag = src->u.discard.flag;
 441                dst->u.discard.id = src->u.discard.id;
 442                dst->u.discard.sector_number = src->u.discard.sector_number;
 443                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 444                break;
 445        case BLKIF_OP_INDIRECT:
 446                dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 447                dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 448                dst->u.indirect.handle = src->u.indirect.handle;
 449                dst->u.indirect.id = src->u.indirect.id;
 450                dst->u.indirect.sector_number = src->u.indirect.sector_number;
 451                barrier();
 452                j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 453                for (i = 0; i < j; i++)
 454                        dst->u.indirect.indirect_grefs[i] =
 455                                src->u.indirect.indirect_grefs[i];
 456                break;
 457        default:
 458                /*
 459                 * Don't know how to translate this op. Only get the
 460                 * ID so failure can be reported to the frontend.
 461                 */
 462                dst->u.other.id = src->u.other.id;
 463                break;
 464        }
 465}
 466
 467static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 468                                        struct blkif_x86_64_request *src)
 469{
 470        int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 471        dst->operation = READ_ONCE(src->operation);
 472        switch (dst->operation) {
 473        case BLKIF_OP_READ:
 474        case BLKIF_OP_WRITE:
 475        case BLKIF_OP_WRITE_BARRIER:
 476        case BLKIF_OP_FLUSH_DISKCACHE:
 477                dst->u.rw.nr_segments = src->u.rw.nr_segments;
 478                dst->u.rw.handle = src->u.rw.handle;
 479                dst->u.rw.id = src->u.rw.id;
 480                dst->u.rw.sector_number = src->u.rw.sector_number;
 481                barrier();
 482                if (n > dst->u.rw.nr_segments)
 483                        n = dst->u.rw.nr_segments;
 484                for (i = 0; i < n; i++)
 485                        dst->u.rw.seg[i] = src->u.rw.seg[i];
 486                break;
 487        case BLKIF_OP_DISCARD:
 488                dst->u.discard.flag = src->u.discard.flag;
 489                dst->u.discard.id = src->u.discard.id;
 490                dst->u.discard.sector_number = src->u.discard.sector_number;
 491                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 492                break;
 493        case BLKIF_OP_INDIRECT:
 494                dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 495                dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 496                dst->u.indirect.handle = src->u.indirect.handle;
 497                dst->u.indirect.id = src->u.indirect.id;
 498                dst->u.indirect.sector_number = src->u.indirect.sector_number;
 499                barrier();
 500                j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 501                for (i = 0; i < j; i++)
 502                        dst->u.indirect.indirect_grefs[i] =
 503                                src->u.indirect.indirect_grefs[i];
 504                break;
 505        default:
 506                /*
 507                 * Don't know how to translate this op. Only get the
 508                 * ID so failure can be reported to the frontend.
 509                 */
 510                dst->u.other.id = src->u.other.id;
 511                break;
 512        }
 513}
 514
 515#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
 516