linux/include/linux/dst.h
<<
>>
Prefs
   1/*
   2 * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
   3 * All rights reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation; either version 2 of the License, or
   8 * (at your option) any later version.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 */
  15
  16#ifndef __DST_H
  17#define __DST_H
  18
  19#include <linux/types.h>
  20#include <linux/connector.h>
  21
  22#define DST_NAMELEN             32
  23#define DST_NAME                "dst"
  24
  25enum {
  26        /* Remove node with given id from storage */
  27        DST_DEL_NODE    = 0,
  28        /* Add remote node with given id to the storage */
  29        DST_ADD_REMOTE,
  30        /* Add local node with given id to the storage to be exported and used by remote peers */
  31        DST_ADD_EXPORT,
  32        /* Crypto initialization command (hash/cipher used to protect the connection) */
  33        DST_CRYPTO,
  34        /* Security attributes for given connection (permissions for example) */
  35        DST_SECURITY,
  36        /* Register given node in the block layer subsystem */
  37        DST_START,
  38        DST_CMD_MAX
  39};
  40
  41struct dst_ctl
  42{
  43        /* Storage name */
  44        char                    name[DST_NAMELEN];
  45        /* Command flags */
  46        __u32                   flags;
  47        /* Command itself (see above) */
  48        __u32                   cmd;
  49        /* Maximum number of pages per single request in this device */
  50        __u32                   max_pages;
  51        /* Stale/error transaction scanning timeout in milliseconds */
  52        __u32                   trans_scan_timeout;
  53        /* Maximum number of retry sends before completing transaction as broken */
  54        __u32                   trans_max_retries;
  55        /* Storage size */
  56        __u64                   size;
  57};
  58
  59/* Reply command carries completion status */
  60struct dst_ctl_ack
  61{
  62        struct cn_msg           msg;
  63        int                     error;
  64        int                     unused[3];
  65};
  66
  67/*
  68 * Unfortunaltely socket address structure is not exported to userspace
  69 * and is redefined there.
  70 */
  71#define SADDR_MAX_DATA  128
  72
  73struct saddr {
  74        /* address family, AF_xxx       */
  75        unsigned short          sa_family;
  76        /* 14 bytes of protocol address */
  77        char                    sa_data[SADDR_MAX_DATA];
  78        /* Number of bytes used in sa_data */
  79        unsigned short          sa_data_len;
  80};
  81
  82/* Address structure */
  83struct dst_network_ctl
  84{
  85        /* Socket type: datagram, stream...*/
  86        unsigned int            type;
  87        /* Let me guess, is it a Jupiter diameter? */
  88        unsigned int            proto;
  89        /* Peer's address */
  90        struct saddr            addr;
  91};
  92
  93struct dst_crypto_ctl
  94{
  95        /* Cipher and hash names */
  96        char                    cipher_algo[DST_NAMELEN];
  97        char                    hash_algo[DST_NAMELEN];
  98
  99        /* Key sizes. Can be zero for digest for example */
 100        unsigned int            cipher_keysize, hash_keysize;
 101        /* Alignment. Calculated by the DST itself. */
 102        unsigned int            crypto_attached_size;
 103        /* Number of threads to perform crypto operations */
 104        int                     thread_num;
 105};
 106
 107/* Export security attributes have this bits checked in when client connects */
 108#define DST_PERM_READ           (1<<0)
 109#define DST_PERM_WRITE          (1<<1)
 110
 111/*
 112 * Right now it is simple model, where each remote address
 113 * is assigned to set of permissions it is allowed to perform.
 114 * In real world block device does not know anything but
 115 * reading and writing, so it should be more than enough.
 116 */
 117struct dst_secure_user
 118{
 119        unsigned int            permissions;
 120        struct saddr            addr;
 121};
 122
 123/*
 124 * Export control command: device to export and network address to accept
 125 * clients to work with given device
 126 */
 127struct dst_export_ctl
 128{
 129        char                    device[DST_NAMELEN];
 130        struct dst_network_ctl  ctl;
 131};
 132
 133enum {
 134        DST_CFG = 1,            /* Request remote configuration */
 135        DST_IO,                 /* IO command */
 136        DST_IO_RESPONSE,        /* IO response */
 137        DST_PING,               /* Keepalive message */
 138        DST_NCMD_MAX,
 139};
 140
 141struct dst_cmd
 142{
 143        /* Network command itself, see above */
 144        __u32                   cmd;
 145        /*
 146         * Size of the attached data
 147         * (in most cases, for READ command it means how many bytes were requested)
 148         */
 149        __u32                   size;
 150        /* Crypto size: number of attached bytes with digest/hmac */
 151        __u32                   csize;
 152        /* Here we can carry secret data */
 153        __u32                   reserved;
 154        /* Read/write bits, see how they are encoded in bio structure */
 155        __u64                   rw;
 156        /* BIO flags */
 157        __u64                   flags;
 158        /* Unique command id (like transaction ID) */
 159        __u64                   id;
 160        /* Sector to start IO from */
 161        __u64                   sector;
 162        /* Hash data is placed after this header */
 163        __u8                    hash[0];
 164};
 165
 166/*
 167 * Convert command to/from network byte order.
 168 * We do not use hton*() functions, since there is
 169 * no 64-bit implementation.
 170 */
 171static inline void dst_convert_cmd(struct dst_cmd *c)
 172{
 173        c->cmd = __cpu_to_be32(c->cmd);
 174        c->csize = __cpu_to_be32(c->csize);
 175        c->size = __cpu_to_be32(c->size);
 176        c->sector = __cpu_to_be64(c->sector);
 177        c->id = __cpu_to_be64(c->id);
 178        c->flags = __cpu_to_be64(c->flags);
 179        c->rw = __cpu_to_be64(c->rw);
 180}
 181
 182/* Transaction id */
 183typedef __u64 dst_gen_t;
 184
 185#ifdef __KERNEL__
 186
 187#include <linux/blkdev.h>
 188#include <linux/bio.h>
 189#include <linux/device.h>
 190#include <linux/mempool.h>
 191#include <linux/net.h>
 192#include <linux/poll.h>
 193#include <linux/rbtree.h>
 194
 195#ifdef CONFIG_DST_DEBUG
 196#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
 197#else
 198static inline void __attribute__ ((format (printf, 1, 2)))
 199        dprintk(const char *fmt, ...) {}
 200#endif
 201
 202struct dst_node;
 203
 204struct dst_trans
 205{
 206        /* DST node we are working with */
 207        struct dst_node         *n;
 208
 209        /* Entry inside transaction tree */
 210        struct rb_node          trans_entry;
 211
 212        /* Merlin kills this transaction when this memory cell equals zero */
 213        atomic_t                refcnt;
 214
 215        /* How this transaction should be processed by crypto engine */
 216        short                   enc;
 217        /* How many times this transaction was resent */
 218        short                   retries;
 219        /* Completion status */
 220        int                     error;
 221
 222        /* When did we send it to the remote peer */
 223        long                    send_time;
 224
 225        /* My name is...
 226         * Well, computers does not speak, they have unique id instead */
 227        dst_gen_t               gen;
 228
 229        /* Block IO we are working with */
 230        struct bio              *bio;
 231
 232        /* Network command for above block IO request */
 233        struct dst_cmd          cmd;
 234};
 235
 236struct dst_crypto_engine
 237{
 238        /* What should we do with all block requests */
 239        struct crypto_hash      *hash;
 240        struct crypto_ablkcipher        *cipher;
 241
 242        /* Pool of pages used to encrypt data into before sending */
 243        int                     page_num;
 244        struct page             **pages;
 245
 246        /* What to do with current request */
 247        int                     enc;
 248        /* Who we are and where do we go */
 249        struct scatterlist      *src, *dst;
 250
 251        /* Maximum timeout waiting for encryption to be completed */
 252        long                    timeout;
 253        /* IV is a 64-bit sequential counter */
 254        u64                     iv;
 255
 256        /* Secret data */
 257        void                    *private;
 258
 259        /* Cached temporary data lives here */
 260        int                     size;
 261        void                    *data;
 262};
 263
 264struct dst_state
 265{
 266        /* The main state protection */
 267        struct mutex            state_lock;
 268
 269        /* Polling machinery for sockets */
 270        wait_queue_t            wait;
 271        wait_queue_head_t       *whead;
 272        /* Most of events are being waited here */
 273        wait_queue_head_t       thread_wait;
 274
 275        /* Who owns this? */
 276        struct dst_node         *node;
 277
 278        /* Network address for this state */
 279        struct dst_network_ctl  ctl;
 280
 281        /* Permissions to work with: read-only or rw connection */
 282        u32                     permissions;
 283
 284        /* Called when we need to clean private data */
 285        void                    (* cleanup)(struct dst_state *st);
 286
 287        /* Used by the server: BIO completion queues BIOs here */
 288        struct list_head        request_list;
 289        spinlock_t              request_lock;
 290
 291        /* Guess what? No, it is not number of planets */
 292        atomic_t                refcnt;
 293
 294        /* This flags is set when connection should be dropped */
 295        int                     need_exit;
 296
 297        /*
 298         * Socket to work with. Second pointer is used for
 299         * lockless check if socket was changed before performing
 300         * next action (like working with cached polling result)
 301         */
 302        struct socket           *socket, *read_socket;
 303
 304        /* Cached preallocated data */
 305        void                    *data;
 306        unsigned int            size;
 307
 308        /* Currently processed command */
 309        struct dst_cmd          cmd;
 310};
 311
 312struct dst_info
 313{
 314        /* Device size */
 315        u64                     size;
 316
 317        /* Local device name for export devices */
 318        char                    local[DST_NAMELEN];
 319
 320        /* Network setup */
 321        struct dst_network_ctl  net;
 322
 323        /* Sysfs bits use this */
 324        struct device           device;
 325};
 326
 327struct dst_node
 328{
 329        struct list_head        node_entry;
 330
 331        /* Hi, my name is stored here */
 332        char                    name[DST_NAMELEN];
 333        /* My cache name is stored here */
 334        char                    cache_name[DST_NAMELEN];
 335
 336        /* Block device attached to given node.
 337         * Only valid for exporting nodes */
 338        struct block_device     *bdev;
 339        /* Network state machine for given peer */
 340        struct dst_state        *state;
 341
 342        /* Block IO machinery */
 343        struct request_queue    *queue;
 344        struct gendisk          *disk;
 345
 346        /* Number of threads in processing pool */
 347        int                     thread_num;
 348        /* Maximum number of pages in single IO */
 349        int                     max_pages;
 350
 351        /* I'm that big in bytes */
 352        loff_t                  size;
 353
 354        /* Exported to userspace node information */
 355        struct dst_info         *info;
 356
 357        /*
 358         * Security attribute list.
 359         * Used only by exporting node currently.
 360         */
 361        struct list_head        security_list;
 362        struct mutex            security_lock;
 363
 364        /*
 365         * When this unerflows below zero, university collapses.
 366         * But this will not happen, since node will be freed,
 367         * when reference counter reaches zero.
 368         */
 369        atomic_t                refcnt;
 370
 371        /* How precisely should I be started? */
 372        int                     (*start)(struct dst_node *);
 373
 374        /* Crypto capabilities */
 375        struct dst_crypto_ctl   crypto;
 376        u8                      *hash_key;
 377        u8                      *cipher_key;
 378
 379        /* Pool of processing thread */
 380        struct thread_pool      *pool;
 381
 382        /* Transaction IDs live here */
 383        atomic_long_t           gen;
 384
 385        /*
 386         * How frequently and how many times transaction
 387         * tree should be scanned to drop stale objects.
 388         */
 389        long                    trans_scan_timeout;
 390        int                     trans_max_retries;
 391
 392        /* Small gnomes live here */
 393        struct rb_root          trans_root;
 394        struct mutex            trans_lock;
 395
 396        /*
 397         * Transaction cache/memory pool.
 398         * It is big enough to contain not only transaction
 399         * itself, but additional crypto data (digest/hmac).
 400         */
 401        struct kmem_cache       *trans_cache;
 402        mempool_t               *trans_pool;
 403
 404        /* This entity scans transaction tree */
 405        struct delayed_work     trans_work;
 406
 407        wait_queue_head_t       wait;
 408};
 409
 410/* Kernel representation of the security attribute */
 411struct dst_secure
 412{
 413        struct list_head        sec_entry;
 414        struct dst_secure_user  sec;
 415};
 416
 417int dst_process_bio(struct dst_node *n, struct bio *bio);
 418
 419int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
 420int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
 421
 422static inline struct dst_state *dst_state_get(struct dst_state *st)
 423{
 424        BUG_ON(atomic_read(&st->refcnt) == 0);
 425        atomic_inc(&st->refcnt);
 426        return st;
 427}
 428
 429void dst_state_put(struct dst_state *st);
 430
 431struct dst_state *dst_state_alloc(struct dst_node *n);
 432int dst_state_socket_create(struct dst_state *st);
 433void dst_state_socket_release(struct dst_state *st);
 434
 435void dst_state_exit_connected(struct dst_state *st);
 436
 437int dst_state_schedule_receiver(struct dst_state *st);
 438
 439void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
 440
 441static inline void dst_state_lock(struct dst_state *st)
 442{
 443        mutex_lock(&st->state_lock);
 444}
 445
 446static inline void dst_state_unlock(struct dst_state *st)
 447{
 448        mutex_unlock(&st->state_lock);
 449}
 450
 451void dst_poll_exit(struct dst_state *st);
 452int dst_poll_init(struct dst_state *st);
 453
 454static inline unsigned int dst_state_poll(struct dst_state *st)
 455{
 456        unsigned int revents = POLLHUP | POLLERR;
 457
 458        dst_state_lock(st);
 459        if (st->socket)
 460                revents = st->socket->ops->poll(NULL, st->socket, NULL);
 461        dst_state_unlock(st);
 462
 463        return revents;
 464}
 465
 466static inline int dst_thread_setup(void *private, void *data)
 467{
 468        return 0;
 469}
 470
 471void dst_node_put(struct dst_node *n);
 472
 473static inline struct dst_node *dst_node_get(struct dst_node *n)
 474{
 475        atomic_inc(&n->refcnt);
 476        return n;
 477}
 478
 479int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
 480int dst_recv_cdata(struct dst_state *st, void *cdata);
 481int dst_data_send_header(struct socket *sock,
 482                void *data, unsigned int size, int more);
 483
 484int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
 485
 486int dst_process_io(struct dst_state *st);
 487int dst_export_crypto(struct dst_node *n, struct bio *bio);
 488int dst_export_send_bio(struct bio *bio);
 489int dst_start_export(struct dst_node *n);
 490
 491int __init dst_export_init(void);
 492void dst_export_exit(void);
 493
 494/* Private structure for export block IO requests */
 495struct dst_export_priv
 496{
 497        struct list_head                request_entry;
 498        struct dst_state                *state;
 499        struct bio                      *bio;
 500        struct dst_cmd                  cmd;
 501};
 502
 503static inline void dst_trans_get(struct dst_trans *t)
 504{
 505        atomic_inc(&t->refcnt);
 506}
 507
 508struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
 509int dst_trans_remove(struct dst_trans *t);
 510int dst_trans_remove_nolock(struct dst_trans *t);
 511void dst_trans_put(struct dst_trans *t);
 512
 513/*
 514 * Convert bio into network command.
 515 */
 516static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
 517                u32 command, u64 id)
 518{
 519        cmd->cmd = command;
 520        cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
 521        cmd->rw = bio->bi_rw;
 522        cmd->size = bio->bi_size;
 523        cmd->csize = 0;
 524        cmd->id = id;
 525        cmd->sector = bio->bi_sector;
 526};
 527
 528int dst_trans_send(struct dst_trans *t);
 529int dst_trans_crypto(struct dst_trans *t);
 530
 531int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
 532void dst_node_crypto_exit(struct dst_node *n);
 533
 534static inline int dst_need_crypto(struct dst_node *n)
 535{
 536        struct dst_crypto_ctl *c = &n->crypto;
 537        /*
 538         * Logical OR is appropriate here, but boolean one produces
 539         * more optimal code, so it is used instead.
 540         */
 541        return (c->hash_algo[0] | c->cipher_algo[0]);
 542}
 543
 544int dst_node_trans_init(struct dst_node *n, unsigned int size);
 545void dst_node_trans_exit(struct dst_node *n);
 546
 547/*
 548 * Pool of threads.
 549 * Ready list contains threads currently free to be used,
 550 * active one contains threads with some work scheduled for them.
 551 * Caller can wait in given queue when thread is ready.
 552 */
 553struct thread_pool
 554{
 555        int                     thread_num;
 556        struct mutex            thread_lock;
 557        struct list_head        ready_list, active_list;
 558
 559        wait_queue_head_t       wait;
 560};
 561
 562void thread_pool_del_worker(struct thread_pool *p);
 563void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
 564int thread_pool_add_worker(struct thread_pool *p,
 565                char *name,
 566                unsigned int id,
 567                void *(* init)(void *data),
 568                void (* cleanup)(void *data),
 569                void *data);
 570
 571void thread_pool_destroy(struct thread_pool *p);
 572struct thread_pool *thread_pool_create(int num, char *name,
 573                void *(* init)(void *data),
 574                void (* cleanup)(void *data),
 575                void *data);
 576
 577int thread_pool_schedule(struct thread_pool *p,
 578                int (* setup)(void *stored_private, void *setup_data),
 579                int (* action)(void *stored_private, void *setup_data),
 580                void *setup_data, long timeout);
 581int thread_pool_schedule_private(struct thread_pool *p,
 582                int (* setup)(void *private, void *data),
 583                int (* action)(void *private, void *data),
 584                void *data, long timeout, void *id);
 585
 586#endif /* __KERNEL__ */
 587#endif /* __DST_H */
 588