linux/drivers/block/xen-blkback/common.h
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or
   3 * modify it under the terms of the GNU General Public License version 2
   4 * as published by the Free Software Foundation; or, when distributed
   5 * separately from the Linux kernel or incorporated into other
   6 * software packages, subject to the following license:
   7 *
   8 * Permission is hereby granted, free of charge, to any person obtaining a copy
   9 * of this source file (the "Software"), to deal in the Software without
  10 * restriction, including without limitation the rights to use, copy, modify,
  11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  12 * and to permit persons to whom the Software is furnished to do so, subject to
  13 * the following conditions:
  14 *
  15 * The above copyright notice and this permission notice shall be included in
  16 * all copies or substantial portions of the Software.
  17 *
  18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24 * IN THE SOFTWARE.
  25 */
  26
  27#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
  28#define __XEN_BLKIF__BACKEND__COMMON_H__
  29
  30#include <linux/module.h>
  31#include <linux/interrupt.h>
  32#include <linux/slab.h>
  33#include <linux/blkdev.h>
  34#include <linux/vmalloc.h>
  35#include <linux/wait.h>
  36#include <linux/io.h>
  37#include <linux/rbtree.h>
  38#include <asm/setup.h>
  39#include <asm/hypervisor.h>
  40#include <xen/grant_table.h>
  41#include <xen/page.h>
  42#include <xen/xenbus.h>
  43#include <xen/interface/io/ring.h>
  44#include <xen/interface/io/blkif.h>
  45#include <xen/interface/io/protocols.h>
  46
  47extern unsigned int xen_blkif_max_ring_order;
  48extern unsigned int xenblk_max_queues;
  49/*
  50 * This is the maximum number of segments that would be allowed in indirect
  51 * requests. This value will also be passed to the frontend.
  52 */
  53#define MAX_INDIRECT_SEGMENTS 256
  54
  55/*
  56 * Xen use 4K pages. The guest may use different page size (4K or 64K)
  57 * Number of Xen pages per segment
  58 */
  59#define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
  60
  61#define XEN_PAGES_PER_INDIRECT_FRAME \
  62        (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
  63#define SEGS_PER_INDIRECT_FRAME \
  64        (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
  65
  66#define MAX_INDIRECT_PAGES \
  67        ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  68#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
  69
  70/* Not a real protocol.  Used to generate ring structs which contain
  71 * the elements common to all protocols only.  This way we get a
  72 * compiler-checkable way to use common struct elements, so we can
  73 * avoid using switch(protocol) in a number of places.  */
  74struct blkif_common_request {
  75        char dummy;
  76};
  77
  78/* i386 protocol version */
  79
  80struct blkif_x86_32_request_rw {
  81        uint8_t        nr_segments;  /* number of segments                   */
  82        blkif_vdev_t   handle;       /* only for read/write requests         */
  83        uint64_t       id;           /* private guest value, echoed in resp  */
  84        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  85        struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  86} __attribute__((__packed__));
  87
  88struct blkif_x86_32_request_discard {
  89        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
  90        blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
  91        uint64_t       id;           /* private guest value, echoed in resp  */
  92        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  93        uint64_t       nr_sectors;
  94} __attribute__((__packed__));
  95
  96struct blkif_x86_32_request_other {
  97        uint8_t        _pad1;
  98        blkif_vdev_t   _pad2;
  99        uint64_t       id;           /* private guest value, echoed in resp  */
 100} __attribute__((__packed__));
 101
 102struct blkif_x86_32_request_indirect {
 103        uint8_t        indirect_op;
 104        uint16_t       nr_segments;
 105        uint64_t       id;
 106        blkif_sector_t sector_number;
 107        blkif_vdev_t   handle;
 108        uint16_t       _pad1;
 109        grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 110        /*
 111         * The maximum number of indirect segments (and pages) that will
 112         * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 113         * is also exported to the guest (via xenstore
 114         * feature-max-indirect-segments entry), so the frontend knows how
 115         * many indirect segments the backend supports.
 116         */
 117        uint64_t       _pad2;        /* make it 64 byte aligned */
 118} __attribute__((__packed__));
 119
 120struct blkif_x86_32_request {
 121        uint8_t        operation;    /* BLKIF_OP_???                         */
 122        union {
 123                struct blkif_x86_32_request_rw rw;
 124                struct blkif_x86_32_request_discard discard;
 125                struct blkif_x86_32_request_other other;
 126                struct blkif_x86_32_request_indirect indirect;
 127        } u;
 128} __attribute__((__packed__));
 129
 130/* x86_64 protocol version */
 131
 132struct blkif_x86_64_request_rw {
 133        uint8_t        nr_segments;  /* number of segments                   */
 134        blkif_vdev_t   handle;       /* only for read/write requests         */
 135        uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
 136        uint64_t       id;
 137        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 138        struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 139} __attribute__((__packed__));
 140
 141struct blkif_x86_64_request_discard {
 142        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
 143        blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
 144        uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
 145        uint64_t       id;
 146        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 147        uint64_t       nr_sectors;
 148} __attribute__((__packed__));
 149
 150struct blkif_x86_64_request_other {
 151        uint8_t        _pad1;
 152        blkif_vdev_t   _pad2;
 153        uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
 154        uint64_t       id;           /* private guest value, echoed in resp  */
 155} __attribute__((__packed__));
 156
 157struct blkif_x86_64_request_indirect {
 158        uint8_t        indirect_op;
 159        uint16_t       nr_segments;
 160        uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
 161        uint64_t       id;
 162        blkif_sector_t sector_number;
 163        blkif_vdev_t   handle;
 164        uint16_t       _pad2;
 165        grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 166        /*
 167         * The maximum number of indirect segments (and pages) that will
 168         * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 169         * is also exported to the guest (via xenstore
 170         * feature-max-indirect-segments entry), so the frontend knows how
 171         * many indirect segments the backend supports.
 172         */
 173        uint32_t       _pad3;        /* make it 64 byte aligned */
 174} __attribute__((__packed__));
 175
 176struct blkif_x86_64_request {
 177        uint8_t        operation;    /* BLKIF_OP_???                         */
 178        union {
 179                struct blkif_x86_64_request_rw rw;
 180                struct blkif_x86_64_request_discard discard;
 181                struct blkif_x86_64_request_other other;
 182                struct blkif_x86_64_request_indirect indirect;
 183        } u;
 184} __attribute__((__packed__));
 185
 186DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
 187                  struct blkif_response);
 188DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
 189                  struct blkif_response __packed);
 190DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
 191                  struct blkif_response);
 192
 193union blkif_back_rings {
 194        struct blkif_back_ring        native;
 195        struct blkif_common_back_ring common;
 196        struct blkif_x86_32_back_ring x86_32;
 197        struct blkif_x86_64_back_ring x86_64;
 198};
 199
 200enum blkif_protocol {
 201        BLKIF_PROTOCOL_NATIVE = 1,
 202        BLKIF_PROTOCOL_X86_32 = 2,
 203        BLKIF_PROTOCOL_X86_64 = 3,
 204};
 205
 206/*
 207 * Default protocol if the frontend doesn't specify one.
 208 */
 209#ifdef CONFIG_X86
 210#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
 211#else
 212#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
 213#endif
 214
 215struct xen_vbd {
 216        /* What the domain refers to this vbd as. */
 217        blkif_vdev_t            handle;
 218        /* Non-zero -> read-only */
 219        unsigned char           readonly;
 220        /* VDISK_xxx */
 221        unsigned char           type;
 222        /* phys device that this vbd maps to. */
 223        u32                     pdevice;
 224        struct block_device     *bdev;
 225        /* Cached size parameter. */
 226        sector_t                size;
 227        unsigned int            flush_support:1;
 228        unsigned int            discard_secure:1;
 229        unsigned int            feature_gnt_persistent:1;
 230        unsigned int            overflow_max_grants:1;
 231};
 232
 233struct backend_info;
 234
 235/* Number of requests that we can fit in a ring */
 236#define XEN_BLKIF_REQS_PER_PAGE         32
 237
 238struct persistent_gnt {
 239        struct page *page;
 240        grant_ref_t gnt;
 241        grant_handle_t handle;
 242        unsigned long last_used;
 243        bool active;
 244        struct rb_node node;
 245        struct list_head remove_node;
 246};
 247
 248/* Per-ring information. */
 249struct xen_blkif_ring {
 250        /* Physical parameters of the comms window. */
 251        unsigned int            irq;
 252        union blkif_back_rings  blk_rings;
 253        void                    *blk_ring;
 254        /* Private fields. */
 255        spinlock_t              blk_ring_lock;
 256
 257        wait_queue_head_t       wq;
 258        atomic_t                inflight;
 259        bool                    active;
 260        /* One thread per blkif ring. */
 261        struct task_struct      *xenblkd;
 262        unsigned int            waiting_reqs;
 263
 264        /* List of all 'pending_req' available */
 265        struct list_head        pending_free;
 266        /* And its spinlock. */
 267        spinlock_t              pending_free_lock;
 268        wait_queue_head_t       pending_free_wq;
 269
 270        /* Tree to store persistent grants. */
 271        struct rb_root          persistent_gnts;
 272        unsigned int            persistent_gnt_c;
 273        atomic_t                persistent_gnt_in_use;
 274        unsigned long           next_lru;
 275
 276        /* Statistics. */
 277        unsigned long           st_print;
 278        unsigned long long      st_rd_req;
 279        unsigned long long      st_wr_req;
 280        unsigned long long      st_oo_req;
 281        unsigned long long      st_f_req;
 282        unsigned long long      st_ds_req;
 283        unsigned long long      st_rd_sect;
 284        unsigned long long      st_wr_sect;
 285
 286        /* Used by the kworker that offload work from the persistent purge. */
 287        struct list_head        persistent_purge_list;
 288        struct work_struct      persistent_purge_work;
 289
 290        /* Buffer of free pages to map grant refs. */
 291        struct gnttab_page_cache free_pages;
 292
 293        struct work_struct      free_work;
 294        /* Thread shutdown wait queue. */
 295        wait_queue_head_t       shutdown_wq;
 296        struct xen_blkif        *blkif;
 297};
 298
 299struct xen_blkif {
 300        /* Unique identifier for this interface. */
 301        domid_t                 domid;
 302        unsigned int            handle;
 303        /* Comms information. */
 304        enum blkif_protocol     blk_protocol;
 305        /* The VBD attached to this interface. */
 306        struct xen_vbd          vbd;
 307        /* Back pointer to the backend_info. */
 308        struct backend_info     *be;
 309        atomic_t                refcnt;
 310        /* for barrier (drain) requests */
 311        struct completion       drain_complete;
 312        atomic_t                drain;
 313
 314        struct work_struct      free_work;
 315        unsigned int            nr_ring_pages;
 316        bool                    multi_ref;
 317        /* All rings for this device. */
 318        struct xen_blkif_ring   *rings;
 319        unsigned int            nr_rings;
 320        unsigned long           buffer_squeeze_end;
 321};
 322
 323struct seg_buf {
 324        unsigned long offset;
 325        unsigned int nsec;
 326};
 327
 328struct grant_page {
 329        struct page             *page;
 330        struct persistent_gnt   *persistent_gnt;
 331        grant_handle_t          handle;
 332        grant_ref_t             gref;
 333};
 334
 335/*
 336 * Each outstanding request that we've passed to the lower device layers has a
 337 * 'pending_req' allocated to it. Each buffer_head that completes decrements
 338 * the pendcnt towards zero. When it hits zero, the specified domain has a
 339 * response queued for it, with the saved 'id' passed back.
 340 */
 341struct pending_req {
 342        struct xen_blkif_ring   *ring;
 343        u64                     id;
 344        int                     nr_segs;
 345        atomic_t                pendcnt;
 346        unsigned short          operation;
 347        int                     status;
 348        struct list_head        free_list;
 349        struct grant_page       *segments[MAX_INDIRECT_SEGMENTS];
 350        /* Indirect descriptors */
 351        struct grant_page       *indirect_pages[MAX_INDIRECT_PAGES];
 352        struct seg_buf          seg[MAX_INDIRECT_SEGMENTS];
 353        struct bio              *biolist[MAX_INDIRECT_SEGMENTS];
 354        struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
 355        struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
 356        struct gntab_unmap_queue_data gnttab_unmap_data;
 357};
 358
 359
 360#define vbd_sz(_v)      bdev_nr_sectors((_v)->bdev)
 361
 362#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 363#define xen_blkif_put(_b)                               \
 364        do {                                            \
 365                if (atomic_dec_and_test(&(_b)->refcnt)) \
 366                        schedule_work(&(_b)->free_work);\
 367        } while (0)
 368
 369struct phys_req {
 370        unsigned short          dev;
 371        blkif_sector_t          nr_sects;
 372        struct block_device     *bdev;
 373        blkif_sector_t          sector_number;
 374};
 375
 376int xen_blkif_interface_init(void);
 377void xen_blkif_interface_fini(void);
 378
 379int xen_blkif_xenbus_init(void);
 380void xen_blkif_xenbus_fini(void);
 381
 382irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 383int xen_blkif_schedule(void *arg);
 384int xen_blkif_purge_persistent(void *arg);
 385void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
 386
 387int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 388                              struct backend_info *be, int state);
 389
 390int xen_blkbk_barrier(struct xenbus_transaction xbt,
 391                      struct backend_info *be, int state);
 392struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 393void xen_blkbk_unmap_purged_grants(struct work_struct *work);
 394
 395static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 396                                        struct blkif_x86_32_request *src)
 397{
 398        int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 399        dst->operation = READ_ONCE(src->operation);
 400        switch (dst->operation) {
 401        case BLKIF_OP_READ:
 402        case BLKIF_OP_WRITE:
 403        case BLKIF_OP_WRITE_BARRIER:
 404        case BLKIF_OP_FLUSH_DISKCACHE:
 405                dst->u.rw.nr_segments = src->u.rw.nr_segments;
 406                dst->u.rw.handle = src->u.rw.handle;
 407                dst->u.rw.id = src->u.rw.id;
 408                dst->u.rw.sector_number = src->u.rw.sector_number;
 409                barrier();
 410                if (n > dst->u.rw.nr_segments)
 411                        n = dst->u.rw.nr_segments;
 412                for (i = 0; i < n; i++)
 413                        dst->u.rw.seg[i] = src->u.rw.seg[i];
 414                break;
 415        case BLKIF_OP_DISCARD:
 416                dst->u.discard.flag = src->u.discard.flag;
 417                dst->u.discard.id = src->u.discard.id;
 418                dst->u.discard.sector_number = src->u.discard.sector_number;
 419                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 420                break;
 421        case BLKIF_OP_INDIRECT:
 422                dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 423                dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 424                dst->u.indirect.handle = src->u.indirect.handle;
 425                dst->u.indirect.id = src->u.indirect.id;
 426                dst->u.indirect.sector_number = src->u.indirect.sector_number;
 427                barrier();
 428                j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 429                for (i = 0; i < j; i++)
 430                        dst->u.indirect.indirect_grefs[i] =
 431                                src->u.indirect.indirect_grefs[i];
 432                break;
 433        default:
 434                /*
 435                 * Don't know how to translate this op. Only get the
 436                 * ID so failure can be reported to the frontend.
 437                 */
 438                dst->u.other.id = src->u.other.id;
 439                break;
 440        }
 441}
 442
 443static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 444                                        struct blkif_x86_64_request *src)
 445{
 446        int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 447        dst->operation = READ_ONCE(src->operation);
 448        switch (dst->operation) {
 449        case BLKIF_OP_READ:
 450        case BLKIF_OP_WRITE:
 451        case BLKIF_OP_WRITE_BARRIER:
 452        case BLKIF_OP_FLUSH_DISKCACHE:
 453                dst->u.rw.nr_segments = src->u.rw.nr_segments;
 454                dst->u.rw.handle = src->u.rw.handle;
 455                dst->u.rw.id = src->u.rw.id;
 456                dst->u.rw.sector_number = src->u.rw.sector_number;
 457                barrier();
 458                if (n > dst->u.rw.nr_segments)
 459                        n = dst->u.rw.nr_segments;
 460                for (i = 0; i < n; i++)
 461                        dst->u.rw.seg[i] = src->u.rw.seg[i];
 462                break;
 463        case BLKIF_OP_DISCARD:
 464                dst->u.discard.flag = src->u.discard.flag;
 465                dst->u.discard.id = src->u.discard.id;
 466                dst->u.discard.sector_number = src->u.discard.sector_number;
 467                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 468                break;
 469        case BLKIF_OP_INDIRECT:
 470                dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 471                dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 472                dst->u.indirect.handle = src->u.indirect.handle;
 473                dst->u.indirect.id = src->u.indirect.id;
 474                dst->u.indirect.sector_number = src->u.indirect.sector_number;
 475                barrier();
 476                j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 477                for (i = 0; i < j; i++)
 478                        dst->u.indirect.indirect_grefs[i] =
 479                                src->u.indirect.indirect_grefs[i];
 480                break;
 481        default:
 482                /*
 483                 * Don't know how to translate this op. Only get the
 484                 * ID so failure can be reported to the frontend.
 485                 */
 486                dst->u.other.id = src->u.other.id;
 487                break;
 488        }
 489}
 490
 491#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
 492