linux/drivers/block/xen-blkback/common.h
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or
   3 * modify it under the terms of the GNU General Public License version 2
   4 * as published by the Free Software Foundation; or, when distributed
   5 * separately from the Linux kernel or incorporated into other
   6 * software packages, subject to the following license:
   7 *
   8 * Permission is hereby granted, free of charge, to any person obtaining a copy
   9 * of this source file (the "Software"), to deal in the Software without
  10 * restriction, including without limitation the rights to use, copy, modify,
  11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  12 * and to permit persons to whom the Software is furnished to do so, subject to
  13 * the following conditions:
  14 *
  15 * The above copyright notice and this permission notice shall be included in
  16 * all copies or substantial portions of the Software.
  17 *
  18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24 * IN THE SOFTWARE.
  25 */
  26
  27#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
  28#define __XEN_BLKIF__BACKEND__COMMON_H__
  29
  30#include <linux/module.h>
  31#include <linux/interrupt.h>
  32#include <linux/slab.h>
  33#include <linux/blkdev.h>
  34#include <linux/vmalloc.h>
  35#include <linux/wait.h>
  36#include <linux/io.h>
  37#include <linux/rbtree.h>
  38#include <asm/setup.h>
  39#include <asm/pgalloc.h>
  40#include <asm/hypervisor.h>
  41#include <xen/grant_table.h>
  42#include <xen/page.h>
  43#include <xen/xenbus.h>
  44#include <xen/interface/io/ring.h>
  45#include <xen/interface/io/blkif.h>
  46#include <xen/interface/io/protocols.h>
  47
  48extern unsigned int xen_blkif_max_ring_order;
  49extern unsigned int xenblk_max_queues;
  50/*
  51 * This is the maximum number of segments that would be allowed in indirect
  52 * requests. This value will also be passed to the frontend.
  53 */
  54#define MAX_INDIRECT_SEGMENTS 256
  55
  56/*
  57 * Xen use 4K pages. The guest may use different page size (4K or 64K)
  58 * Number of Xen pages per segment
  59 */
  60#define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
  61
  62#define XEN_PAGES_PER_INDIRECT_FRAME \
  63        (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
  64#define SEGS_PER_INDIRECT_FRAME \
  65        (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
  66
  67#define MAX_INDIRECT_PAGES \
  68        ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  69#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
  70
  71/* Not a real protocol.  Used to generate ring structs which contain
  72 * the elements common to all protocols only.  This way we get a
  73 * compiler-checkable way to use common struct elements, so we can
  74 * avoid using switch(protocol) in a number of places.  */
  75struct blkif_common_request {
  76        char dummy;
  77};
  78
  79/* i386 protocol version */
  80
  81struct blkif_x86_32_request_rw {
  82        uint8_t        nr_segments;  /* number of segments                   */
  83        blkif_vdev_t   handle;       /* only for read/write requests         */
  84        uint64_t       id;           /* private guest value, echoed in resp  */
  85        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  86        struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  87} __attribute__((__packed__));
  88
  89struct blkif_x86_32_request_discard {
  90        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
  91        blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
  92        uint64_t       id;           /* private guest value, echoed in resp  */
  93        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  94        uint64_t       nr_sectors;
  95} __attribute__((__packed__));
  96
  97struct blkif_x86_32_request_other {
  98        uint8_t        _pad1;
  99        blkif_vdev_t   _pad2;
 100        uint64_t       id;           /* private guest value, echoed in resp  */
 101} __attribute__((__packed__));
 102
 103struct blkif_x86_32_request_indirect {
 104        uint8_t        indirect_op;
 105        uint16_t       nr_segments;
 106        uint64_t       id;
 107        blkif_sector_t sector_number;
 108        blkif_vdev_t   handle;
 109        uint16_t       _pad1;
 110        grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 111        /*
 112         * The maximum number of indirect segments (and pages) that will
 113         * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 114         * is also exported to the guest (via xenstore
 115         * feature-max-indirect-segments entry), so the frontend knows how
 116         * many indirect segments the backend supports.
 117         */
 118        uint64_t       _pad2;        /* make it 64 byte aligned */
 119} __attribute__((__packed__));
 120
 121struct blkif_x86_32_request {
 122        uint8_t        operation;    /* BLKIF_OP_???                         */
 123        union {
 124                struct blkif_x86_32_request_rw rw;
 125                struct blkif_x86_32_request_discard discard;
 126                struct blkif_x86_32_request_other other;
 127                struct blkif_x86_32_request_indirect indirect;
 128        } u;
 129} __attribute__((__packed__));
 130
 131/* x86_64 protocol version */
 132
 133struct blkif_x86_64_request_rw {
 134        uint8_t        nr_segments;  /* number of segments                   */
 135        blkif_vdev_t   handle;       /* only for read/write requests         */
 136        uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
 137        uint64_t       id;
 138        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 139        struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 140} __attribute__((__packed__));
 141
 142struct blkif_x86_64_request_discard {
 143        uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
 144        blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
 145        uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
 146        uint64_t       id;
 147        blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 148        uint64_t       nr_sectors;
 149} __attribute__((__packed__));
 150
 151struct blkif_x86_64_request_other {
 152        uint8_t        _pad1;
 153        blkif_vdev_t   _pad2;
 154        uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
 155        uint64_t       id;           /* private guest value, echoed in resp  */
 156} __attribute__((__packed__));
 157
 158struct blkif_x86_64_request_indirect {
 159        uint8_t        indirect_op;
 160        uint16_t       nr_segments;
 161        uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
 162        uint64_t       id;
 163        blkif_sector_t sector_number;
 164        blkif_vdev_t   handle;
 165        uint16_t       _pad2;
 166        grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 167        /*
 168         * The maximum number of indirect segments (and pages) that will
 169         * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 170         * is also exported to the guest (via xenstore
 171         * feature-max-indirect-segments entry), so the frontend knows how
 172         * many indirect segments the backend supports.
 173         */
 174        uint32_t       _pad3;        /* make it 64 byte aligned */
 175} __attribute__((__packed__));
 176
 177struct blkif_x86_64_request {
 178        uint8_t        operation;    /* BLKIF_OP_???                         */
 179        union {
 180                struct blkif_x86_64_request_rw rw;
 181                struct blkif_x86_64_request_discard discard;
 182                struct blkif_x86_64_request_other other;
 183                struct blkif_x86_64_request_indirect indirect;
 184        } u;
 185} __attribute__((__packed__));
 186
 187DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
 188                  struct blkif_response);
 189DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
 190                  struct blkif_response __packed);
 191DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
 192                  struct blkif_response);
 193
 194union blkif_back_rings {
 195        struct blkif_back_ring        native;
 196        struct blkif_common_back_ring common;
 197        struct blkif_x86_32_back_ring x86_32;
 198        struct blkif_x86_64_back_ring x86_64;
 199};
 200
 201enum blkif_protocol {
 202        BLKIF_PROTOCOL_NATIVE = 1,
 203        BLKIF_PROTOCOL_X86_32 = 2,
 204        BLKIF_PROTOCOL_X86_64 = 3,
 205};
 206
 207/*
 208 * Default protocol if the frontend doesn't specify one.
 209 */
 210#ifdef CONFIG_X86
 211#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
 212#else
 213#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
 214#endif
 215
 216struct xen_vbd {
 217        /* What the domain refers to this vbd as. */
 218        blkif_vdev_t            handle;
 219        /* Non-zero -> read-only */
 220        unsigned char           readonly;
 221        /* VDISK_xxx */
 222        unsigned char           type;
 223        /* phys device that this vbd maps to. */
 224        u32                     pdevice;
 225        struct block_device     *bdev;
 226        /* Cached size parameter. */
 227        sector_t                size;
 228        unsigned int            flush_support:1;
 229        unsigned int            discard_secure:1;
 230        unsigned int            feature_gnt_persistent:1;
 231        unsigned int            overflow_max_grants:1;
 232};
 233
 234struct backend_info;
 235
 236/* Number of requests that we can fit in a ring */
 237#define XEN_BLKIF_REQS_PER_PAGE         32
 238
 239struct persistent_gnt {
 240        struct page *page;
 241        grant_ref_t gnt;
 242        grant_handle_t handle;
 243        unsigned long last_used;
 244        bool active;
 245        struct rb_node node;
 246        struct list_head remove_node;
 247};
 248
 249/* Per-ring information. */
 250struct xen_blkif_ring {
 251        /* Physical parameters of the comms window. */
 252        unsigned int            irq;
 253        union blkif_back_rings  blk_rings;
 254        void                    *blk_ring;
 255        /* Private fields. */
 256        spinlock_t              blk_ring_lock;
 257
 258        wait_queue_head_t       wq;
 259        atomic_t                inflight;
 260        bool                    active;
 261        /* One thread per blkif ring. */
 262        struct task_struct      *xenblkd;
 263        unsigned int            waiting_reqs;
 264
 265        /* List of all 'pending_req' available */
 266        struct list_head        pending_free;
 267        /* And its spinlock. */
 268        spinlock_t              pending_free_lock;
 269        wait_queue_head_t       pending_free_wq;
 270
 271        /* Tree to store persistent grants. */
 272        struct rb_root          persistent_gnts;
 273        unsigned int            persistent_gnt_c;
 274        atomic_t                persistent_gnt_in_use;
 275        unsigned long           next_lru;
 276
 277        /* Statistics. */
 278        unsigned long           st_print;
 279        unsigned long long      st_rd_req;
 280        unsigned long long      st_wr_req;
 281        unsigned long long      st_oo_req;
 282        unsigned long long      st_f_req;
 283        unsigned long long      st_ds_req;
 284        unsigned long long      st_rd_sect;
 285        unsigned long long      st_wr_sect;
 286
 287        /* Used by the kworker that offload work from the persistent purge. */
 288        struct list_head        persistent_purge_list;
 289        struct work_struct      persistent_purge_work;
 290
 291        /* Buffer of free pages to map grant refs. */
 292        spinlock_t              free_pages_lock;
 293        int                     free_pages_num;
 294        struct list_head        free_pages;
 295
 296        struct work_struct      free_work;
 297        /* Thread shutdown wait queue. */
 298        wait_queue_head_t       shutdown_wq;
 299        struct xen_blkif        *blkif;
 300};
 301
 302struct xen_blkif {
 303        /* Unique identifier for this interface. */
 304        domid_t                 domid;
 305        unsigned int            handle;
 306        /* Comms information. */
 307        enum blkif_protocol     blk_protocol;
 308        /* The VBD attached to this interface. */
 309        struct xen_vbd          vbd;
 310        /* Back pointer to the backend_info. */
 311        struct backend_info     *be;
 312        atomic_t                refcnt;
 313        /* for barrier (drain) requests */
 314        struct completion       drain_complete;
 315        atomic_t                drain;
 316
 317        struct work_struct      free_work;
 318        unsigned int            nr_ring_pages;
 319        /* All rings for this device. */
 320        struct xen_blkif_ring   *rings;
 321        unsigned int            nr_rings;
 322};
 323
 324struct seg_buf {
 325        unsigned long offset;
 326        unsigned int nsec;
 327};
 328
 329struct grant_page {
 330        struct page             *page;
 331        struct persistent_gnt   *persistent_gnt;
 332        grant_handle_t          handle;
 333        grant_ref_t             gref;
 334};
 335
 336/*
 337 * Each outstanding request that we've passed to the lower device layers has a
 338 * 'pending_req' allocated to it. Each buffer_head that completes decrements
 339 * the pendcnt towards zero. When it hits zero, the specified domain has a
 340 * response queued for it, with the saved 'id' passed back.
 341 */
 342struct pending_req {
 343        struct xen_blkif_ring   *ring;
 344        u64                     id;
 345        int                     nr_segs;
 346        atomic_t                pendcnt;
 347        unsigned short          operation;
 348        int                     status;
 349        struct list_head        free_list;
 350        struct grant_page       *segments[MAX_INDIRECT_SEGMENTS];
 351        /* Indirect descriptors */
 352        struct grant_page       *indirect_pages[MAX_INDIRECT_PAGES];
 353        struct seg_buf          seg[MAX_INDIRECT_SEGMENTS];
 354        struct bio              *biolist[MAX_INDIRECT_SEGMENTS];
 355        struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
 356        struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
 357        struct gntab_unmap_queue_data gnttab_unmap_data;
 358};
 359
 360
 361#define vbd_sz(_v)      ((_v)->bdev->bd_part ? \
 362                         (_v)->bdev->bd_part->nr_sects : \
 363                          get_capacity((_v)->bdev->bd_disk))
 364
 365#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 366#define xen_blkif_put(_b)                               \
 367        do {                                            \
 368                if (atomic_dec_and_test(&(_b)->refcnt)) \
 369                        schedule_work(&(_b)->free_work);\
 370        } while (0)
 371
 372struct phys_req {
 373        unsigned short          dev;
 374        blkif_sector_t          nr_sects;
 375        struct block_device     *bdev;
 376        blkif_sector_t          sector_number;
 377};
 378int xen_blkif_interface_init(void);
 379
 380int xen_blkif_xenbus_init(void);
 381
 382irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 383int xen_blkif_schedule(void *arg);
 384int xen_blkif_purge_persistent(void *arg);
 385void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
 386
 387int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 388                              struct backend_info *be, int state);
 389
 390int xen_blkbk_barrier(struct xenbus_transaction xbt,
 391                      struct backend_info *be, int state);
 392struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 393void xen_blkbk_unmap_purged_grants(struct work_struct *work);
 394
 395static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 396                                        struct blkif_x86_32_request *src)
 397{
 398        int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 399        dst->operation = READ_ONCE(src->operation);
 400        switch (dst->operation) {
 401        case BLKIF_OP_READ:
 402        case BLKIF_OP_WRITE:
 403        case BLKIF_OP_WRITE_BARRIER:
 404        case BLKIF_OP_FLUSH_DISKCACHE:
 405                dst->u.rw.nr_segments = src->u.rw.nr_segments;
 406                dst->u.rw.handle = src->u.rw.handle;
 407                dst->u.rw.id = src->u.rw.id;
 408                dst->u.rw.sector_number = src->u.rw.sector_number;
 409                barrier();
 410                if (n > dst->u.rw.nr_segments)
 411                        n = dst->u.rw.nr_segments;
 412                for (i = 0; i < n; i++)
 413                        dst->u.rw.seg[i] = src->u.rw.seg[i];
 414                break;
 415        case BLKIF_OP_DISCARD:
 416                dst->u.discard.flag = src->u.discard.flag;
 417                dst->u.discard.id = src->u.discard.id;
 418                dst->u.discard.sector_number = src->u.discard.sector_number;
 419                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 420                break;
 421        case BLKIF_OP_INDIRECT:
 422                dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 423                dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 424                dst->u.indirect.handle = src->u.indirect.handle;
 425                dst->u.indirect.id = src->u.indirect.id;
 426                dst->u.indirect.sector_number = src->u.indirect.sector_number;
 427                barrier();
 428                j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 429                for (i = 0; i < j; i++)
 430                        dst->u.indirect.indirect_grefs[i] =
 431                                src->u.indirect.indirect_grefs[i];
 432                break;
 433        default:
 434                /*
 435                 * Don't know how to translate this op. Only get the
 436                 * ID so failure can be reported to the frontend.
 437                 */
 438                dst->u.other.id = src->u.other.id;
 439                break;
 440        }
 441}
 442
 443static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 444                                        struct blkif_x86_64_request *src)
 445{
 446        int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 447        dst->operation = READ_ONCE(src->operation);
 448        switch (dst->operation) {
 449        case BLKIF_OP_READ:
 450        case BLKIF_OP_WRITE:
 451        case BLKIF_OP_WRITE_BARRIER:
 452        case BLKIF_OP_FLUSH_DISKCACHE:
 453                dst->u.rw.nr_segments = src->u.rw.nr_segments;
 454                dst->u.rw.handle = src->u.rw.handle;
 455                dst->u.rw.id = src->u.rw.id;
 456                dst->u.rw.sector_number = src->u.rw.sector_number;
 457                barrier();
 458                if (n > dst->u.rw.nr_segments)
 459                        n = dst->u.rw.nr_segments;
 460                for (i = 0; i < n; i++)
 461                        dst->u.rw.seg[i] = src->u.rw.seg[i];
 462                break;
 463        case BLKIF_OP_DISCARD:
 464                dst->u.discard.flag = src->u.discard.flag;
 465                dst->u.discard.id = src->u.discard.id;
 466                dst->u.discard.sector_number = src->u.discard.sector_number;
 467                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 468                break;
 469        case BLKIF_OP_INDIRECT:
 470                dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 471                dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 472                dst->u.indirect.handle = src->u.indirect.handle;
 473                dst->u.indirect.id = src->u.indirect.id;
 474                dst->u.indirect.sector_number = src->u.indirect.sector_number;
 475                barrier();
 476                j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 477                for (i = 0; i < j; i++)
 478                        dst->u.indirect.indirect_grefs[i] =
 479                                src->u.indirect.indirect_grefs[i];
 480                break;
 481        default:
 482                /*
 483                 * Don't know how to translate this op. Only get the
 484                 * ID so failure can be reported to the frontend.
 485                 */
 486                dst->u.other.id = src->u.other.id;
 487                break;
 488        }
 489}
 490
 491#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
 492