1
2
3
4
5
6#include <linux/module.h>
7
8#include <linux/moduleparam.h>
9#include <linux/sched.h>
10#include <linux/fs.h>
11#include <linux/init.h>
12#include "null_blk.h"
13
14#define FREE_BATCH 16
15
16#define TICKS_PER_SEC 50ULL
17#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
18
19#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
20static DECLARE_FAULT_ATTR(null_timeout_attr);
21static DECLARE_FAULT_ATTR(null_requeue_attr);
22static DECLARE_FAULT_ATTR(null_init_hctx_attr);
23#endif
24
25static inline u64 mb_per_tick(int mbps)
26{
27 return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
28}
29
30
31
32
33
34
35
36
37
38enum nullb_device_flags {
39 NULLB_DEV_FL_CONFIGURED = 0,
40 NULLB_DEV_FL_UP = 1,
41 NULLB_DEV_FL_THROTTLED = 2,
42 NULLB_DEV_FL_CACHE = 3,
43};
44
45#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
46
47
48
49
50
51
52
53
54
55
56
57
58struct nullb_page {
59 struct page *page;
60 DECLARE_BITMAP(bitmap, MAP_SZ);
61};
62#define NULLB_PAGE_LOCK (MAP_SZ - 1)
63#define NULLB_PAGE_FREE (MAP_SZ - 2)
64
65static LIST_HEAD(nullb_list);
66static struct mutex lock;
67static int null_major;
68static DEFINE_IDA(nullb_indexes);
69static struct blk_mq_tag_set tag_set;
70
71enum {
72 NULL_IRQ_NONE = 0,
73 NULL_IRQ_SOFTIRQ = 1,
74 NULL_IRQ_TIMER = 2,
75};
76
77enum {
78 NULL_Q_BIO = 0,
79 NULL_Q_RQ = 1,
80 NULL_Q_MQ = 2,
81};
82
83static bool g_virt_boundary = false;
84module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
85MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
86
87static int g_no_sched;
88module_param_named(no_sched, g_no_sched, int, 0444);
89MODULE_PARM_DESC(no_sched, "No io scheduler");
90
91static int g_submit_queues = 1;
92module_param_named(submit_queues, g_submit_queues, int, 0444);
93MODULE_PARM_DESC(submit_queues, "Number of submission queues");
94
95static int g_poll_queues = 1;
96module_param_named(poll_queues, g_poll_queues, int, 0444);
97MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues");
98
99static int g_home_node = NUMA_NO_NODE;
100module_param_named(home_node, g_home_node, int, 0444);
101MODULE_PARM_DESC(home_node, "Home node for the device");
102
103#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
104
105
106
107
108static char g_timeout_str[80];
109module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
110MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
111
112static char g_requeue_str[80];
113module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
114MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
115
116static char g_init_hctx_str[80];
117module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
118MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
119#endif
120
121static int g_queue_mode = NULL_Q_MQ;
122
123static int null_param_store_val(const char *str, int *val, int min, int max)
124{
125 int ret, new_val;
126
127 ret = kstrtoint(str, 10, &new_val);
128 if (ret)
129 return -EINVAL;
130
131 if (new_val < min || new_val > max)
132 return -EINVAL;
133
134 *val = new_val;
135 return 0;
136}
137
138static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
139{
140 return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
141}
142
143static const struct kernel_param_ops null_queue_mode_param_ops = {
144 .set = null_set_queue_mode,
145 .get = param_get_int,
146};
147
148device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
149MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
150
151static int g_gb = 250;
152module_param_named(gb, g_gb, int, 0444);
153MODULE_PARM_DESC(gb, "Size in GB");
154
155static int g_bs = 512;
156module_param_named(bs, g_bs, int, 0444);
157MODULE_PARM_DESC(bs, "Block size (in bytes)");
158
159static int g_max_sectors;
160module_param_named(max_sectors, g_max_sectors, int, 0444);
161MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
162
163static unsigned int nr_devices = 1;
164module_param(nr_devices, uint, 0444);
165MODULE_PARM_DESC(nr_devices, "Number of devices to register");
166
167static bool g_blocking;
168module_param_named(blocking, g_blocking, bool, 0444);
169MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
170
171static bool shared_tags;
172module_param(shared_tags, bool, 0444);
173MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
174
175static bool g_shared_tag_bitmap;
176module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
177MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
178
179static int g_irqmode = NULL_IRQ_SOFTIRQ;
180
181static int null_set_irqmode(const char *str, const struct kernel_param *kp)
182{
183 return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
184 NULL_IRQ_TIMER);
185}
186
187static const struct kernel_param_ops null_irqmode_param_ops = {
188 .set = null_set_irqmode,
189 .get = param_get_int,
190};
191
192device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
193MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
194
195static unsigned long g_completion_nsec = 10000;
196module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
197MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
198
199static int g_hw_queue_depth = 64;
200module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
201MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
202
203static bool g_use_per_node_hctx;
204module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
205MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
206
207static bool g_zoned;
208module_param_named(zoned, g_zoned, bool, S_IRUGO);
209MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
210
211static unsigned long g_zone_size = 256;
212module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
213MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
214
215static unsigned long g_zone_capacity;
216module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
217MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
218
219static unsigned int g_zone_nr_conv;
220module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
221MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
222
223static unsigned int g_zone_max_open;
224module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
225MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
226
227static unsigned int g_zone_max_active;
228module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
229MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
230
231static struct nullb_device *null_alloc_dev(void);
232static void null_free_dev(struct nullb_device *dev);
233static void null_del_dev(struct nullb *nullb);
234static int null_add_dev(struct nullb_device *dev);
235static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
236
237static inline struct nullb_device *to_nullb_device(struct config_item *item)
238{
239 return item ? container_of(item, struct nullb_device, item) : NULL;
240}
241
242static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
243{
244 return snprintf(page, PAGE_SIZE, "%u\n", val);
245}
246
247static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
248 char *page)
249{
250 return snprintf(page, PAGE_SIZE, "%lu\n", val);
251}
252
253static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
254{
255 return snprintf(page, PAGE_SIZE, "%u\n", val);
256}
257
258static ssize_t nullb_device_uint_attr_store(unsigned int *val,
259 const char *page, size_t count)
260{
261 unsigned int tmp;
262 int result;
263
264 result = kstrtouint(page, 0, &tmp);
265 if (result < 0)
266 return result;
267
268 *val = tmp;
269 return count;
270}
271
272static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
273 const char *page, size_t count)
274{
275 int result;
276 unsigned long tmp;
277
278 result = kstrtoul(page, 0, &tmp);
279 if (result < 0)
280 return result;
281
282 *val = tmp;
283 return count;
284}
285
286static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
287 size_t count)
288{
289 bool tmp;
290 int result;
291
292 result = kstrtobool(page, &tmp);
293 if (result < 0)
294 return result;
295
296 *val = tmp;
297 return count;
298}
299
300
301#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
302static ssize_t \
303nullb_device_##NAME##_show(struct config_item *item, char *page) \
304{ \
305 return nullb_device_##TYPE##_attr_show( \
306 to_nullb_device(item)->NAME, page); \
307} \
308static ssize_t \
309nullb_device_##NAME##_store(struct config_item *item, const char *page, \
310 size_t count) \
311{ \
312 int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
313 struct nullb_device *dev = to_nullb_device(item); \
314 TYPE new_value = 0; \
315 int ret; \
316 \
317 ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
318 if (ret < 0) \
319 return ret; \
320 if (apply_fn) \
321 ret = apply_fn(dev, new_value); \
322 else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
323 ret = -EBUSY; \
324 if (ret < 0) \
325 return ret; \
326 dev->NAME = new_value; \
327 return count; \
328} \
329CONFIGFS_ATTR(nullb_device_, NAME);
330
331static int nullb_update_nr_hw_queues(struct nullb_device *dev,
332 unsigned int submit_queues,
333 unsigned int poll_queues)
334
335{
336 struct blk_mq_tag_set *set;
337 int ret, nr_hw_queues;
338
339 if (!dev->nullb)
340 return 0;
341
342
343
344
345 if (!submit_queues)
346 return -EINVAL;
347
348
349
350
351
352 if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues)
353 return -EINVAL;
354
355
356
357
358
359 dev->prev_submit_queues = dev->submit_queues;
360 dev->prev_poll_queues = dev->poll_queues;
361 dev->submit_queues = submit_queues;
362 dev->poll_queues = poll_queues;
363
364 set = dev->nullb->tag_set;
365 nr_hw_queues = submit_queues + poll_queues;
366 blk_mq_update_nr_hw_queues(set, nr_hw_queues);
367 ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM;
368
369 if (ret) {
370
371 dev->submit_queues = dev->prev_submit_queues;
372 dev->poll_queues = dev->prev_poll_queues;
373 }
374
375 return ret;
376}
377
378static int nullb_apply_submit_queues(struct nullb_device *dev,
379 unsigned int submit_queues)
380{
381 return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
382}
383
384static int nullb_apply_poll_queues(struct nullb_device *dev,
385 unsigned int poll_queues)
386{
387 return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
388}
389
390NULLB_DEVICE_ATTR(size, ulong, NULL);
391NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
392NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
393NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues);
394NULLB_DEVICE_ATTR(home_node, uint, NULL);
395NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
396NULLB_DEVICE_ATTR(blocksize, uint, NULL);
397NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
398NULLB_DEVICE_ATTR(irqmode, uint, NULL);
399NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
400NULLB_DEVICE_ATTR(index, uint, NULL);
401NULLB_DEVICE_ATTR(blocking, bool, NULL);
402NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
403NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
404NULLB_DEVICE_ATTR(discard, bool, NULL);
405NULLB_DEVICE_ATTR(mbps, uint, NULL);
406NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
407NULLB_DEVICE_ATTR(zoned, bool, NULL);
408NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
409NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
410NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
411NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
412NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
413NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
414
415static ssize_t nullb_device_power_show(struct config_item *item, char *page)
416{
417 return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
418}
419
420static ssize_t nullb_device_power_store(struct config_item *item,
421 const char *page, size_t count)
422{
423 struct nullb_device *dev = to_nullb_device(item);
424 bool newp = false;
425 ssize_t ret;
426
427 ret = nullb_device_bool_attr_store(&newp, page, count);
428 if (ret < 0)
429 return ret;
430
431 if (!dev->power && newp) {
432 if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
433 return count;
434 ret = null_add_dev(dev);
435 if (ret) {
436 clear_bit(NULLB_DEV_FL_UP, &dev->flags);
437 return ret;
438 }
439
440 set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
441 dev->power = newp;
442 } else if (dev->power && !newp) {
443 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
444 mutex_lock(&lock);
445 dev->power = newp;
446 null_del_dev(dev->nullb);
447 mutex_unlock(&lock);
448 }
449 clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
450 }
451
452 return count;
453}
454
455CONFIGFS_ATTR(nullb_device_, power);
456
457static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
458{
459 struct nullb_device *t_dev = to_nullb_device(item);
460
461 return badblocks_show(&t_dev->badblocks, page, 0);
462}
463
464static ssize_t nullb_device_badblocks_store(struct config_item *item,
465 const char *page, size_t count)
466{
467 struct nullb_device *t_dev = to_nullb_device(item);
468 char *orig, *buf, *tmp;
469 u64 start, end;
470 int ret;
471
472 orig = kstrndup(page, count, GFP_KERNEL);
473 if (!orig)
474 return -ENOMEM;
475
476 buf = strstrip(orig);
477
478 ret = -EINVAL;
479 if (buf[0] != '+' && buf[0] != '-')
480 goto out;
481 tmp = strchr(&buf[1], '-');
482 if (!tmp)
483 goto out;
484 *tmp = '\0';
485 ret = kstrtoull(buf + 1, 0, &start);
486 if (ret)
487 goto out;
488 ret = kstrtoull(tmp + 1, 0, &end);
489 if (ret)
490 goto out;
491 ret = -EINVAL;
492 if (start > end)
493 goto out;
494
495 cmpxchg(&t_dev->badblocks.shift, -1, 0);
496 if (buf[0] == '+')
497 ret = badblocks_set(&t_dev->badblocks, start,
498 end - start + 1, 1);
499 else
500 ret = badblocks_clear(&t_dev->badblocks, start,
501 end - start + 1);
502 if (ret == 0)
503 ret = count;
504out:
505 kfree(orig);
506 return ret;
507}
508CONFIGFS_ATTR(nullb_device_, badblocks);
509
510static struct configfs_attribute *nullb_device_attrs[] = {
511 &nullb_device_attr_size,
512 &nullb_device_attr_completion_nsec,
513 &nullb_device_attr_submit_queues,
514 &nullb_device_attr_poll_queues,
515 &nullb_device_attr_home_node,
516 &nullb_device_attr_queue_mode,
517 &nullb_device_attr_blocksize,
518 &nullb_device_attr_max_sectors,
519 &nullb_device_attr_irqmode,
520 &nullb_device_attr_hw_queue_depth,
521 &nullb_device_attr_index,
522 &nullb_device_attr_blocking,
523 &nullb_device_attr_use_per_node_hctx,
524 &nullb_device_attr_power,
525 &nullb_device_attr_memory_backed,
526 &nullb_device_attr_discard,
527 &nullb_device_attr_mbps,
528 &nullb_device_attr_cache_size,
529 &nullb_device_attr_badblocks,
530 &nullb_device_attr_zoned,
531 &nullb_device_attr_zone_size,
532 &nullb_device_attr_zone_capacity,
533 &nullb_device_attr_zone_nr_conv,
534 &nullb_device_attr_zone_max_open,
535 &nullb_device_attr_zone_max_active,
536 &nullb_device_attr_virt_boundary,
537 NULL,
538};
539
540static void nullb_device_release(struct config_item *item)
541{
542 struct nullb_device *dev = to_nullb_device(item);
543
544 null_free_device_storage(dev, false);
545 null_free_dev(dev);
546}
547
548static struct configfs_item_operations nullb_device_ops = {
549 .release = nullb_device_release,
550};
551
552static const struct config_item_type nullb_device_type = {
553 .ct_item_ops = &nullb_device_ops,
554 .ct_attrs = nullb_device_attrs,
555 .ct_owner = THIS_MODULE,
556};
557
558static struct
559config_item *nullb_group_make_item(struct config_group *group, const char *name)
560{
561 struct nullb_device *dev;
562
563 dev = null_alloc_dev();
564 if (!dev)
565 return ERR_PTR(-ENOMEM);
566
567 config_item_init_type_name(&dev->item, name, &nullb_device_type);
568
569 return &dev->item;
570}
571
572static void
573nullb_group_drop_item(struct config_group *group, struct config_item *item)
574{
575 struct nullb_device *dev = to_nullb_device(item);
576
577 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
578 mutex_lock(&lock);
579 dev->power = false;
580 null_del_dev(dev->nullb);
581 mutex_unlock(&lock);
582 }
583
584 config_item_put(item);
585}
586
587static ssize_t memb_group_features_show(struct config_item *item, char *page)
588{
589 return snprintf(page, PAGE_SIZE,
590 "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
591}
592
593CONFIGFS_ATTR_RO(memb_group_, features);
594
595static struct configfs_attribute *nullb_group_attrs[] = {
596 &memb_group_attr_features,
597 NULL,
598};
599
600static struct configfs_group_operations nullb_group_ops = {
601 .make_item = nullb_group_make_item,
602 .drop_item = nullb_group_drop_item,
603};
604
605static const struct config_item_type nullb_group_type = {
606 .ct_group_ops = &nullb_group_ops,
607 .ct_attrs = nullb_group_attrs,
608 .ct_owner = THIS_MODULE,
609};
610
611static struct configfs_subsystem nullb_subsys = {
612 .su_group = {
613 .cg_item = {
614 .ci_namebuf = "nullb",
615 .ci_type = &nullb_group_type,
616 },
617 },
618};
619
620static inline int null_cache_active(struct nullb *nullb)
621{
622 return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
623}
624
625static struct nullb_device *null_alloc_dev(void)
626{
627 struct nullb_device *dev;
628
629 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
630 if (!dev)
631 return NULL;
632 INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
633 INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
634 if (badblocks_init(&dev->badblocks, 0)) {
635 kfree(dev);
636 return NULL;
637 }
638
639 dev->size = g_gb * 1024;
640 dev->completion_nsec = g_completion_nsec;
641 dev->submit_queues = g_submit_queues;
642 dev->prev_submit_queues = g_submit_queues;
643 dev->poll_queues = g_poll_queues;
644 dev->prev_poll_queues = g_poll_queues;
645 dev->home_node = g_home_node;
646 dev->queue_mode = g_queue_mode;
647 dev->blocksize = g_bs;
648 dev->max_sectors = g_max_sectors;
649 dev->irqmode = g_irqmode;
650 dev->hw_queue_depth = g_hw_queue_depth;
651 dev->blocking = g_blocking;
652 dev->use_per_node_hctx = g_use_per_node_hctx;
653 dev->zoned = g_zoned;
654 dev->zone_size = g_zone_size;
655 dev->zone_capacity = g_zone_capacity;
656 dev->zone_nr_conv = g_zone_nr_conv;
657 dev->zone_max_open = g_zone_max_open;
658 dev->zone_max_active = g_zone_max_active;
659 dev->virt_boundary = g_virt_boundary;
660 return dev;
661}
662
663static void null_free_dev(struct nullb_device *dev)
664{
665 if (!dev)
666 return;
667
668 null_free_zoned_dev(dev);
669 badblocks_exit(&dev->badblocks);
670 kfree(dev);
671}
672
673static void put_tag(struct nullb_queue *nq, unsigned int tag)
674{
675 clear_bit_unlock(tag, nq->tag_map);
676
677 if (waitqueue_active(&nq->wait))
678 wake_up(&nq->wait);
679}
680
681static unsigned int get_tag(struct nullb_queue *nq)
682{
683 unsigned int tag;
684
685 do {
686 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
687 if (tag >= nq->queue_depth)
688 return -1U;
689 } while (test_and_set_bit_lock(tag, nq->tag_map));
690
691 return tag;
692}
693
694static void free_cmd(struct nullb_cmd *cmd)
695{
696 put_tag(cmd->nq, cmd->tag);
697}
698
699static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
700
701static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
702{
703 struct nullb_cmd *cmd;
704 unsigned int tag;
705
706 tag = get_tag(nq);
707 if (tag != -1U) {
708 cmd = &nq->cmds[tag];
709 cmd->tag = tag;
710 cmd->error = BLK_STS_OK;
711 cmd->nq = nq;
712 if (nq->dev->irqmode == NULL_IRQ_TIMER) {
713 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
714 HRTIMER_MODE_REL);
715 cmd->timer.function = null_cmd_timer_expired;
716 }
717 return cmd;
718 }
719
720 return NULL;
721}
722
723static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
724{
725 struct nullb_cmd *cmd;
726 DEFINE_WAIT(wait);
727
728 do {
729
730
731
732
733 cmd = __alloc_cmd(nq);
734 if (cmd) {
735 cmd->bio = bio;
736 return cmd;
737 }
738 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
739 io_schedule();
740 finish_wait(&nq->wait, &wait);
741 } while (1);
742}
743
744static void end_cmd(struct nullb_cmd *cmd)
745{
746 int queue_mode = cmd->nq->dev->queue_mode;
747
748 switch (queue_mode) {
749 case NULL_Q_MQ:
750 blk_mq_end_request(cmd->rq, cmd->error);
751 return;
752 case NULL_Q_BIO:
753 cmd->bio->bi_status = cmd->error;
754 bio_endio(cmd->bio);
755 break;
756 }
757
758 free_cmd(cmd);
759}
760
761static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
762{
763 end_cmd(container_of(timer, struct nullb_cmd, timer));
764
765 return HRTIMER_NORESTART;
766}
767
768static void null_cmd_end_timer(struct nullb_cmd *cmd)
769{
770 ktime_t kt = cmd->nq->dev->completion_nsec;
771
772 hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
773}
774
775static void null_complete_rq(struct request *rq)
776{
777 end_cmd(blk_mq_rq_to_pdu(rq));
778}
779
780static struct nullb_page *null_alloc_page(void)
781{
782 struct nullb_page *t_page;
783
784 t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
785 if (!t_page)
786 return NULL;
787
788 t_page->page = alloc_pages(GFP_NOIO, 0);
789 if (!t_page->page) {
790 kfree(t_page);
791 return NULL;
792 }
793
794 memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
795 return t_page;
796}
797
798static void null_free_page(struct nullb_page *t_page)
799{
800 __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
801 if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
802 return;
803 __free_page(t_page->page);
804 kfree(t_page);
805}
806
807static bool null_page_empty(struct nullb_page *page)
808{
809 int size = MAP_SZ - 2;
810
811 return find_first_bit(page->bitmap, size) == size;
812}
813
814static void null_free_sector(struct nullb *nullb, sector_t sector,
815 bool is_cache)
816{
817 unsigned int sector_bit;
818 u64 idx;
819 struct nullb_page *t_page, *ret;
820 struct radix_tree_root *root;
821
822 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
823 idx = sector >> PAGE_SECTORS_SHIFT;
824 sector_bit = (sector & SECTOR_MASK);
825
826 t_page = radix_tree_lookup(root, idx);
827 if (t_page) {
828 __clear_bit(sector_bit, t_page->bitmap);
829
830 if (null_page_empty(t_page)) {
831 ret = radix_tree_delete_item(root, idx, t_page);
832 WARN_ON(ret != t_page);
833 null_free_page(ret);
834 if (is_cache)
835 nullb->dev->curr_cache -= PAGE_SIZE;
836 }
837 }
838}
839
840static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
841 struct nullb_page *t_page, bool is_cache)
842{
843 struct radix_tree_root *root;
844
845 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
846
847 if (radix_tree_insert(root, idx, t_page)) {
848 null_free_page(t_page);
849 t_page = radix_tree_lookup(root, idx);
850 WARN_ON(!t_page || t_page->page->index != idx);
851 } else if (is_cache)
852 nullb->dev->curr_cache += PAGE_SIZE;
853
854 return t_page;
855}
856
857static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
858{
859 unsigned long pos = 0;
860 int nr_pages;
861 struct nullb_page *ret, *t_pages[FREE_BATCH];
862 struct radix_tree_root *root;
863
864 root = is_cache ? &dev->cache : &dev->data;
865
866 do {
867 int i;
868
869 nr_pages = radix_tree_gang_lookup(root,
870 (void **)t_pages, pos, FREE_BATCH);
871
872 for (i = 0; i < nr_pages; i++) {
873 pos = t_pages[i]->page->index;
874 ret = radix_tree_delete_item(root, pos, t_pages[i]);
875 WARN_ON(ret != t_pages[i]);
876 null_free_page(ret);
877 }
878
879 pos++;
880 } while (nr_pages == FREE_BATCH);
881
882 if (is_cache)
883 dev->curr_cache = 0;
884}
885
886static struct nullb_page *__null_lookup_page(struct nullb *nullb,
887 sector_t sector, bool for_write, bool is_cache)
888{
889 unsigned int sector_bit;
890 u64 idx;
891 struct nullb_page *t_page;
892 struct radix_tree_root *root;
893
894 idx = sector >> PAGE_SECTORS_SHIFT;
895 sector_bit = (sector & SECTOR_MASK);
896
897 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
898 t_page = radix_tree_lookup(root, idx);
899 WARN_ON(t_page && t_page->page->index != idx);
900
901 if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
902 return t_page;
903
904 return NULL;
905}
906
907static struct nullb_page *null_lookup_page(struct nullb *nullb,
908 sector_t sector, bool for_write, bool ignore_cache)
909{
910 struct nullb_page *page = NULL;
911
912 if (!ignore_cache)
913 page = __null_lookup_page(nullb, sector, for_write, true);
914 if (page)
915 return page;
916 return __null_lookup_page(nullb, sector, for_write, false);
917}
918
919static struct nullb_page *null_insert_page(struct nullb *nullb,
920 sector_t sector, bool ignore_cache)
921 __releases(&nullb->lock)
922 __acquires(&nullb->lock)
923{
924 u64 idx;
925 struct nullb_page *t_page;
926
927 t_page = null_lookup_page(nullb, sector, true, ignore_cache);
928 if (t_page)
929 return t_page;
930
931 spin_unlock_irq(&nullb->lock);
932
933 t_page = null_alloc_page();
934 if (!t_page)
935 goto out_lock;
936
937 if (radix_tree_preload(GFP_NOIO))
938 goto out_freepage;
939
940 spin_lock_irq(&nullb->lock);
941 idx = sector >> PAGE_SECTORS_SHIFT;
942 t_page->page->index = idx;
943 t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
944 radix_tree_preload_end();
945
946 return t_page;
947out_freepage:
948 null_free_page(t_page);
949out_lock:
950 spin_lock_irq(&nullb->lock);
951 return null_lookup_page(nullb, sector, true, ignore_cache);
952}
953
954static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
955{
956 int i;
957 unsigned int offset;
958 u64 idx;
959 struct nullb_page *t_page, *ret;
960 void *dst, *src;
961
962 idx = c_page->page->index;
963
964 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
965
966 __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
967 if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
968 null_free_page(c_page);
969 if (t_page && null_page_empty(t_page)) {
970 ret = radix_tree_delete_item(&nullb->dev->data,
971 idx, t_page);
972 null_free_page(t_page);
973 }
974 return 0;
975 }
976
977 if (!t_page)
978 return -ENOMEM;
979
980 src = kmap_atomic(c_page->page);
981 dst = kmap_atomic(t_page->page);
982
983 for (i = 0; i < PAGE_SECTORS;
984 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
985 if (test_bit(i, c_page->bitmap)) {
986 offset = (i << SECTOR_SHIFT);
987 memcpy(dst + offset, src + offset,
988 nullb->dev->blocksize);
989 __set_bit(i, t_page->bitmap);
990 }
991 }
992
993 kunmap_atomic(dst);
994 kunmap_atomic(src);
995
996 ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
997 null_free_page(ret);
998 nullb->dev->curr_cache -= PAGE_SIZE;
999
1000 return 0;
1001}
1002
1003static int null_make_cache_space(struct nullb *nullb, unsigned long n)
1004{
1005 int i, err, nr_pages;
1006 struct nullb_page *c_pages[FREE_BATCH];
1007 unsigned long flushed = 0, one_round;
1008
1009again:
1010 if ((nullb->dev->cache_size * 1024 * 1024) >
1011 nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
1012 return 0;
1013
1014 nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
1015 (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
1016
1017
1018
1019
1020 for (i = 0; i < nr_pages; i++) {
1021 nullb->cache_flush_pos = c_pages[i]->page->index;
1022
1023
1024
1025
1026 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
1027 c_pages[i] = NULL;
1028 else
1029 __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
1030 }
1031
1032 one_round = 0;
1033 for (i = 0; i < nr_pages; i++) {
1034 if (c_pages[i] == NULL)
1035 continue;
1036 err = null_flush_cache_page(nullb, c_pages[i]);
1037 if (err)
1038 return err;
1039 one_round++;
1040 }
1041 flushed += one_round << PAGE_SHIFT;
1042
1043 if (n > flushed) {
1044 if (nr_pages == 0)
1045 nullb->cache_flush_pos = 0;
1046 if (one_round == 0) {
1047
1048 spin_unlock_irq(&nullb->lock);
1049 spin_lock_irq(&nullb->lock);
1050 }
1051 goto again;
1052 }
1053 return 0;
1054}
1055
1056static int copy_to_nullb(struct nullb *nullb, struct page *source,
1057 unsigned int off, sector_t sector, size_t n, bool is_fua)
1058{
1059 size_t temp, count = 0;
1060 unsigned int offset;
1061 struct nullb_page *t_page;
1062 void *dst, *src;
1063
1064 while (count < n) {
1065 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1066
1067 if (null_cache_active(nullb) && !is_fua)
1068 null_make_cache_space(nullb, PAGE_SIZE);
1069
1070 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
1071 t_page = null_insert_page(nullb, sector,
1072 !null_cache_active(nullb) || is_fua);
1073 if (!t_page)
1074 return -ENOSPC;
1075
1076 src = kmap_atomic(source);
1077 dst = kmap_atomic(t_page->page);
1078 memcpy(dst + offset, src + off + count, temp);
1079 kunmap_atomic(dst);
1080 kunmap_atomic(src);
1081
1082 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
1083
1084 if (is_fua)
1085 null_free_sector(nullb, sector, true);
1086
1087 count += temp;
1088 sector += temp >> SECTOR_SHIFT;
1089 }
1090 return 0;
1091}
1092
1093static int copy_from_nullb(struct nullb *nullb, struct page *dest,
1094 unsigned int off, sector_t sector, size_t n)
1095{
1096 size_t temp, count = 0;
1097 unsigned int offset;
1098 struct nullb_page *t_page;
1099 void *dst, *src;
1100
1101 while (count < n) {
1102 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1103
1104 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
1105 t_page = null_lookup_page(nullb, sector, false,
1106 !null_cache_active(nullb));
1107
1108 dst = kmap_atomic(dest);
1109 if (!t_page) {
1110 memset(dst + off + count, 0, temp);
1111 goto next;
1112 }
1113 src = kmap_atomic(t_page->page);
1114 memcpy(dst + off + count, src + offset, temp);
1115 kunmap_atomic(src);
1116next:
1117 kunmap_atomic(dst);
1118
1119 count += temp;
1120 sector += temp >> SECTOR_SHIFT;
1121 }
1122 return 0;
1123}
1124
1125static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
1126 unsigned int len, unsigned int off)
1127{
1128 void *dst;
1129
1130 dst = kmap_atomic(page);
1131 memset(dst + off, 0xFF, len);
1132 kunmap_atomic(dst);
1133}
1134
1135blk_status_t null_handle_discard(struct nullb_device *dev,
1136 sector_t sector, sector_t nr_sectors)
1137{
1138 struct nullb *nullb = dev->nullb;
1139 size_t n = nr_sectors << SECTOR_SHIFT;
1140 size_t temp;
1141
1142 spin_lock_irq(&nullb->lock);
1143 while (n > 0) {
1144 temp = min_t(size_t, n, dev->blocksize);
1145 null_free_sector(nullb, sector, false);
1146 if (null_cache_active(nullb))
1147 null_free_sector(nullb, sector, true);
1148 sector += temp >> SECTOR_SHIFT;
1149 n -= temp;
1150 }
1151 spin_unlock_irq(&nullb->lock);
1152
1153 return BLK_STS_OK;
1154}
1155
1156static int null_handle_flush(struct nullb *nullb)
1157{
1158 int err;
1159
1160 if (!null_cache_active(nullb))
1161 return 0;
1162
1163 spin_lock_irq(&nullb->lock);
1164 while (true) {
1165 err = null_make_cache_space(nullb,
1166 nullb->dev->cache_size * 1024 * 1024);
1167 if (err || nullb->dev->curr_cache == 0)
1168 break;
1169 }
1170
1171 WARN_ON(!radix_tree_empty(&nullb->dev->cache));
1172 spin_unlock_irq(&nullb->lock);
1173 return err;
1174}
1175
1176static int null_transfer(struct nullb *nullb, struct page *page,
1177 unsigned int len, unsigned int off, bool is_write, sector_t sector,
1178 bool is_fua)
1179{
1180 struct nullb_device *dev = nullb->dev;
1181 unsigned int valid_len = len;
1182 int err = 0;
1183
1184 if (!is_write) {
1185 if (dev->zoned)
1186 valid_len = null_zone_valid_read_len(nullb,
1187 sector, len);
1188
1189 if (valid_len) {
1190 err = copy_from_nullb(nullb, page, off,
1191 sector, valid_len);
1192 off += valid_len;
1193 len -= valid_len;
1194 }
1195
1196 if (len)
1197 nullb_fill_pattern(nullb, page, len, off);
1198 flush_dcache_page(page);
1199 } else {
1200 flush_dcache_page(page);
1201 err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
1202 }
1203
1204 return err;
1205}
1206
1207static int null_handle_rq(struct nullb_cmd *cmd)
1208{
1209 struct request *rq = cmd->rq;
1210 struct nullb *nullb = cmd->nq->dev->nullb;
1211 int err;
1212 unsigned int len;
1213 sector_t sector = blk_rq_pos(rq);
1214 struct req_iterator iter;
1215 struct bio_vec bvec;
1216
1217 spin_lock_irq(&nullb->lock);
1218 rq_for_each_segment(bvec, rq, iter) {
1219 len = bvec.bv_len;
1220 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
1221 op_is_write(req_op(rq)), sector,
1222 rq->cmd_flags & REQ_FUA);
1223 if (err) {
1224 spin_unlock_irq(&nullb->lock);
1225 return err;
1226 }
1227 sector += len >> SECTOR_SHIFT;
1228 }
1229 spin_unlock_irq(&nullb->lock);
1230
1231 return 0;
1232}
1233
1234static int null_handle_bio(struct nullb_cmd *cmd)
1235{
1236 struct bio *bio = cmd->bio;
1237 struct nullb *nullb = cmd->nq->dev->nullb;
1238 int err;
1239 unsigned int len;
1240 sector_t sector = bio->bi_iter.bi_sector;
1241 struct bio_vec bvec;
1242 struct bvec_iter iter;
1243
1244 spin_lock_irq(&nullb->lock);
1245 bio_for_each_segment(bvec, bio, iter) {
1246 len = bvec.bv_len;
1247 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
1248 op_is_write(bio_op(bio)), sector,
1249 bio->bi_opf & REQ_FUA);
1250 if (err) {
1251 spin_unlock_irq(&nullb->lock);
1252 return err;
1253 }
1254 sector += len >> SECTOR_SHIFT;
1255 }
1256 spin_unlock_irq(&nullb->lock);
1257 return 0;
1258}
1259
1260static void null_stop_queue(struct nullb *nullb)
1261{
1262 struct request_queue *q = nullb->q;
1263
1264 if (nullb->dev->queue_mode == NULL_Q_MQ)
1265 blk_mq_stop_hw_queues(q);
1266}
1267
1268static void null_restart_queue_async(struct nullb *nullb)
1269{
1270 struct request_queue *q = nullb->q;
1271
1272 if (nullb->dev->queue_mode == NULL_Q_MQ)
1273 blk_mq_start_stopped_hw_queues(q, true);
1274}
1275
1276static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
1277{
1278 struct nullb_device *dev = cmd->nq->dev;
1279 struct nullb *nullb = dev->nullb;
1280 blk_status_t sts = BLK_STS_OK;
1281 struct request *rq = cmd->rq;
1282
1283 if (!hrtimer_active(&nullb->bw_timer))
1284 hrtimer_restart(&nullb->bw_timer);
1285
1286 if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
1287 null_stop_queue(nullb);
1288
1289 if (atomic_long_read(&nullb->cur_bytes) > 0)
1290 null_restart_queue_async(nullb);
1291
1292 sts = BLK_STS_DEV_RESOURCE;
1293 }
1294 return sts;
1295}
1296
1297static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
1298 sector_t sector,
1299 sector_t nr_sectors)
1300{
1301 struct badblocks *bb = &cmd->nq->dev->badblocks;
1302 sector_t first_bad;
1303 int bad_sectors;
1304
1305 if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
1306 return BLK_STS_IOERR;
1307
1308 return BLK_STS_OK;
1309}
1310
1311static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
1312 enum req_opf op,
1313 sector_t sector,
1314 sector_t nr_sectors)
1315{
1316 struct nullb_device *dev = cmd->nq->dev;
1317 int err;
1318
1319 if (op == REQ_OP_DISCARD)
1320 return null_handle_discard(dev, sector, nr_sectors);
1321
1322 if (dev->queue_mode == NULL_Q_BIO)
1323 err = null_handle_bio(cmd);
1324 else
1325 err = null_handle_rq(cmd);
1326
1327 return errno_to_blk_status(err);
1328}
1329
1330static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
1331{
1332 struct nullb_device *dev = cmd->nq->dev;
1333 struct bio *bio;
1334
1335 if (dev->memory_backed)
1336 return;
1337
1338 if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
1339 zero_fill_bio(cmd->bio);
1340 } else if (req_op(cmd->rq) == REQ_OP_READ) {
1341 __rq_for_each_bio(bio, cmd->rq)
1342 zero_fill_bio(bio);
1343 }
1344}
1345
1346static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
1347{
1348
1349
1350
1351
1352
1353
1354
1355 if (IS_ENABLED(CONFIG_KMSAN))
1356 nullb_zero_read_cmd_buffer(cmd);
1357
1358
1359 switch (cmd->nq->dev->irqmode) {
1360 case NULL_IRQ_SOFTIRQ:
1361 switch (cmd->nq->dev->queue_mode) {
1362 case NULL_Q_MQ:
1363 if (likely(!blk_should_fake_timeout(cmd->rq->q)))
1364 blk_mq_complete_request(cmd->rq);
1365 break;
1366 case NULL_Q_BIO:
1367
1368
1369
1370 end_cmd(cmd);
1371 break;
1372 }
1373 break;
1374 case NULL_IRQ_NONE:
1375 end_cmd(cmd);
1376 break;
1377 case NULL_IRQ_TIMER:
1378 null_cmd_end_timer(cmd);
1379 break;
1380 }
1381}
1382
1383blk_status_t null_process_cmd(struct nullb_cmd *cmd,
1384 enum req_opf op, sector_t sector,
1385 unsigned int nr_sectors)
1386{
1387 struct nullb_device *dev = cmd->nq->dev;
1388 blk_status_t ret;
1389
1390 if (dev->badblocks.shift != -1) {
1391 ret = null_handle_badblocks(cmd, sector, nr_sectors);
1392 if (ret != BLK_STS_OK)
1393 return ret;
1394 }
1395
1396 if (dev->memory_backed)
1397 return null_handle_memory_backed(cmd, op, sector, nr_sectors);
1398
1399 return BLK_STS_OK;
1400}
1401
1402static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
1403 sector_t nr_sectors, enum req_opf op)
1404{
1405 struct nullb_device *dev = cmd->nq->dev;
1406 struct nullb *nullb = dev->nullb;
1407 blk_status_t sts;
1408
1409 if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
1410 sts = null_handle_throttled(cmd);
1411 if (sts != BLK_STS_OK)
1412 return sts;
1413 }
1414
1415 if (op == REQ_OP_FLUSH) {
1416 cmd->error = errno_to_blk_status(null_handle_flush(nullb));
1417 goto out;
1418 }
1419
1420 if (dev->zoned)
1421 sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
1422 else
1423 sts = null_process_cmd(cmd, op, sector, nr_sectors);
1424
1425
1426 if (cmd->error == BLK_STS_OK)
1427 cmd->error = sts;
1428
1429out:
1430 nullb_complete_cmd(cmd);
1431 return BLK_STS_OK;
1432}
1433
1434static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
1435{
1436 struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
1437 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1438 unsigned int mbps = nullb->dev->mbps;
1439
1440 if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
1441 return HRTIMER_NORESTART;
1442
1443 atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
1444 null_restart_queue_async(nullb);
1445
1446 hrtimer_forward_now(&nullb->bw_timer, timer_interval);
1447
1448 return HRTIMER_RESTART;
1449}
1450
1451static void nullb_setup_bwtimer(struct nullb *nullb)
1452{
1453 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1454
1455 hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1456 nullb->bw_timer.function = nullb_bwtimer_fn;
1457 atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
1458 hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
1459}
1460
1461static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
1462{
1463 int index = 0;
1464
1465 if (nullb->nr_queues != 1)
1466 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
1467
1468 return &nullb->queues[index];
1469}
1470
1471static void null_submit_bio(struct bio *bio)
1472{
1473 sector_t sector = bio->bi_iter.bi_sector;
1474 sector_t nr_sectors = bio_sectors(bio);
1475 struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
1476 struct nullb_queue *nq = nullb_to_queue(nullb);
1477
1478 null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
1479}
1480
1481static bool should_timeout_request(struct request *rq)
1482{
1483#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1484 if (g_timeout_str[0])
1485 return should_fail(&null_timeout_attr, 1);
1486#endif
1487 return false;
1488}
1489
1490static bool should_requeue_request(struct request *rq)
1491{
1492#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1493 if (g_requeue_str[0])
1494 return should_fail(&null_requeue_attr, 1);
1495#endif
1496 return false;
1497}
1498
1499static int null_map_queues(struct blk_mq_tag_set *set)
1500{
1501 struct nullb *nullb = set->driver_data;
1502 int i, qoff;
1503 unsigned int submit_queues = g_submit_queues;
1504 unsigned int poll_queues = g_poll_queues;
1505
1506 if (nullb) {
1507 struct nullb_device *dev = nullb->dev;
1508
1509
1510
1511
1512
1513
1514
1515 if (set->nr_hw_queues ==
1516 dev->submit_queues + dev->poll_queues) {
1517 submit_queues = dev->submit_queues;
1518 poll_queues = dev->poll_queues;
1519 } else if (set->nr_hw_queues ==
1520 dev->prev_submit_queues + dev->prev_poll_queues) {
1521 submit_queues = dev->prev_submit_queues;
1522 poll_queues = dev->prev_poll_queues;
1523 } else {
1524 pr_warn("tag set has unexpected nr_hw_queues: %d\n",
1525 set->nr_hw_queues);
1526 return -EINVAL;
1527 }
1528 }
1529
1530 for (i = 0, qoff = 0; i < set->nr_maps; i++) {
1531 struct blk_mq_queue_map *map = &set->map[i];
1532
1533 switch (i) {
1534 case HCTX_TYPE_DEFAULT:
1535 map->nr_queues = submit_queues;
1536 break;
1537 case HCTX_TYPE_READ:
1538 map->nr_queues = 0;
1539 continue;
1540 case HCTX_TYPE_POLL:
1541 map->nr_queues = poll_queues;
1542 break;
1543 }
1544 map->queue_offset = qoff;
1545 qoff += map->nr_queues;
1546 blk_mq_map_queues(map);
1547 }
1548
1549 return 0;
1550}
1551
1552static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
1553{
1554 struct nullb_queue *nq = hctx->driver_data;
1555 LIST_HEAD(list);
1556 int nr = 0;
1557
1558 spin_lock(&nq->poll_lock);
1559 list_splice_init(&nq->poll_list, &list);
1560 spin_unlock(&nq->poll_lock);
1561
1562 while (!list_empty(&list)) {
1563 struct nullb_cmd *cmd;
1564 struct request *req;
1565
1566 req = list_first_entry(&list, struct request, queuelist);
1567 list_del_init(&req->queuelist);
1568 cmd = blk_mq_rq_to_pdu(req);
1569 cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
1570 blk_rq_sectors(req));
1571 if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
1572 blk_mq_end_request_batch))
1573 end_cmd(cmd);
1574 nr++;
1575 }
1576
1577 return nr;
1578}
1579
1580static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
1581{
1582 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
1583 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1584
1585 pr_info("rq %p timed out\n", rq);
1586
1587 if (hctx->type == HCTX_TYPE_POLL) {
1588 struct nullb_queue *nq = hctx->driver_data;
1589
1590 spin_lock(&nq->poll_lock);
1591 list_del_init(&rq->queuelist);
1592 spin_unlock(&nq->poll_lock);
1593 }
1594
1595
1596
1597
1598
1599
1600
1601
1602 cmd->error = BLK_STS_TIMEOUT;
1603 if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL)
1604 blk_mq_complete_request(rq);
1605 return BLK_EH_DONE;
1606}
1607
1608static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
1609 const struct blk_mq_queue_data *bd)
1610{
1611 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
1612 struct nullb_queue *nq = hctx->driver_data;
1613 sector_t nr_sectors = blk_rq_sectors(bd->rq);
1614 sector_t sector = blk_rq_pos(bd->rq);
1615 const bool is_poll = hctx->type == HCTX_TYPE_POLL;
1616
1617 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
1618
1619 if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
1620 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1621 cmd->timer.function = null_cmd_timer_expired;
1622 }
1623 cmd->rq = bd->rq;
1624 cmd->error = BLK_STS_OK;
1625 cmd->nq = nq;
1626 cmd->fake_timeout = should_timeout_request(bd->rq);
1627
1628 blk_mq_start_request(bd->rq);
1629
1630 if (should_requeue_request(bd->rq)) {
1631
1632
1633
1634
1635 nq->requeue_selection++;
1636 if (nq->requeue_selection & 1)
1637 return BLK_STS_RESOURCE;
1638 else {
1639 blk_mq_requeue_request(bd->rq, true);
1640 return BLK_STS_OK;
1641 }
1642 }
1643
1644 if (is_poll) {
1645 spin_lock(&nq->poll_lock);
1646 list_add_tail(&bd->rq->queuelist, &nq->poll_list);
1647 spin_unlock(&nq->poll_lock);
1648 return BLK_STS_OK;
1649 }
1650 if (cmd->fake_timeout)
1651 return BLK_STS_OK;
1652
1653 return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
1654}
1655
1656static void cleanup_queue(struct nullb_queue *nq)
1657{
1658 kfree(nq->tag_map);
1659 kfree(nq->cmds);
1660}
1661
1662static void cleanup_queues(struct nullb *nullb)
1663{
1664 int i;
1665
1666 for (i = 0; i < nullb->nr_queues; i++)
1667 cleanup_queue(&nullb->queues[i]);
1668
1669 kfree(nullb->queues);
1670}
1671
1672static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
1673{
1674 struct nullb_queue *nq = hctx->driver_data;
1675 struct nullb *nullb = nq->dev->nullb;
1676
1677 nullb->nr_queues--;
1678}
1679
1680static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
1681{
1682 init_waitqueue_head(&nq->wait);
1683 nq->queue_depth = nullb->queue_depth;
1684 nq->dev = nullb->dev;
1685 INIT_LIST_HEAD(&nq->poll_list);
1686 spin_lock_init(&nq->poll_lock);
1687}
1688
1689static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
1690 unsigned int hctx_idx)
1691{
1692 struct nullb *nullb = hctx->queue->queuedata;
1693 struct nullb_queue *nq;
1694
1695#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1696 if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
1697 return -EFAULT;
1698#endif
1699
1700 nq = &nullb->queues[hctx_idx];
1701 hctx->driver_data = nq;
1702 null_init_queue(nullb, nq);
1703 nullb->nr_queues++;
1704
1705 return 0;
1706}
1707
1708static const struct blk_mq_ops null_mq_ops = {
1709 .queue_rq = null_queue_rq,
1710 .complete = null_complete_rq,
1711 .timeout = null_timeout_rq,
1712 .poll = null_poll,
1713 .map_queues = null_map_queues,
1714 .init_hctx = null_init_hctx,
1715 .exit_hctx = null_exit_hctx,
1716};
1717
1718static void null_del_dev(struct nullb *nullb)
1719{
1720 struct nullb_device *dev;
1721
1722 if (!nullb)
1723 return;
1724
1725 dev = nullb->dev;
1726
1727 ida_simple_remove(&nullb_indexes, nullb->index);
1728
1729 list_del_init(&nullb->list);
1730
1731 del_gendisk(nullb->disk);
1732
1733 if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
1734 hrtimer_cancel(&nullb->bw_timer);
1735 atomic_long_set(&nullb->cur_bytes, LONG_MAX);
1736 null_restart_queue_async(nullb);
1737 }
1738
1739 blk_cleanup_disk(nullb->disk);
1740 if (dev->queue_mode == NULL_Q_MQ &&
1741 nullb->tag_set == &nullb->__tag_set)
1742 blk_mq_free_tag_set(nullb->tag_set);
1743 cleanup_queues(nullb);
1744 if (null_cache_active(nullb))
1745 null_free_device_storage(nullb->dev, true);
1746 kfree(nullb);
1747 dev->nullb = NULL;
1748}
1749
1750static void null_config_discard(struct nullb *nullb)
1751{
1752 if (nullb->dev->discard == false)
1753 return;
1754
1755 if (!nullb->dev->memory_backed) {
1756 nullb->dev->discard = false;
1757 pr_info("discard option is ignored without memory backing\n");
1758 return;
1759 }
1760
1761 if (nullb->dev->zoned) {
1762 nullb->dev->discard = false;
1763 pr_info("discard option is ignored in zoned mode\n");
1764 return;
1765 }
1766
1767 nullb->q->limits.discard_granularity = nullb->dev->blocksize;
1768 nullb->q->limits.discard_alignment = nullb->dev->blocksize;
1769 blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
1770 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
1771}
1772
1773static const struct block_device_operations null_bio_ops = {
1774 .owner = THIS_MODULE,
1775 .submit_bio = null_submit_bio,
1776 .report_zones = null_report_zones,
1777};
1778
1779static const struct block_device_operations null_rq_ops = {
1780 .owner = THIS_MODULE,
1781 .report_zones = null_report_zones,
1782};
1783
1784static int setup_commands(struct nullb_queue *nq)
1785{
1786 struct nullb_cmd *cmd;
1787 int i, tag_size;
1788
1789 nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
1790 if (!nq->cmds)
1791 return -ENOMEM;
1792
1793 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
1794 nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
1795 if (!nq->tag_map) {
1796 kfree(nq->cmds);
1797 return -ENOMEM;
1798 }
1799
1800 for (i = 0; i < nq->queue_depth; i++) {
1801 cmd = &nq->cmds[i];
1802 cmd->tag = -1U;
1803 }
1804
1805 return 0;
1806}
1807
1808static int setup_queues(struct nullb *nullb)
1809{
1810 int nqueues = nr_cpu_ids;
1811
1812 if (g_poll_queues)
1813 nqueues += g_poll_queues;
1814
1815 nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue),
1816 GFP_KERNEL);
1817 if (!nullb->queues)
1818 return -ENOMEM;
1819
1820 nullb->queue_depth = nullb->dev->hw_queue_depth;
1821 return 0;
1822}
1823
1824static int init_driver_queues(struct nullb *nullb)
1825{
1826 struct nullb_queue *nq;
1827 int i, ret = 0;
1828
1829 for (i = 0; i < nullb->dev->submit_queues; i++) {
1830 nq = &nullb->queues[i];
1831
1832 null_init_queue(nullb, nq);
1833
1834 ret = setup_commands(nq);
1835 if (ret)
1836 return ret;
1837 nullb->nr_queues++;
1838 }
1839 return 0;
1840}
1841
1842static int null_gendisk_register(struct nullb *nullb)
1843{
1844 sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
1845 struct gendisk *disk = nullb->disk;
1846
1847 set_capacity(disk, size);
1848
1849 disk->major = null_major;
1850 disk->first_minor = nullb->index;
1851 disk->minors = 1;
1852 if (queue_is_mq(nullb->q))
1853 disk->fops = &null_rq_ops;
1854 else
1855 disk->fops = &null_bio_ops;
1856 disk->private_data = nullb;
1857 strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
1858
1859 if (nullb->dev->zoned) {
1860 int ret = null_register_zoned_dev(nullb);
1861
1862 if (ret)
1863 return ret;
1864 }
1865
1866 return add_disk(disk);
1867}
1868
1869static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
1870{
1871 int poll_queues;
1872
1873 set->ops = &null_mq_ops;
1874 set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
1875 g_submit_queues;
1876 poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
1877 if (poll_queues)
1878 set->nr_hw_queues += poll_queues;
1879 set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
1880 g_hw_queue_depth;
1881 set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
1882 set->cmd_size = sizeof(struct nullb_cmd);
1883 set->flags = BLK_MQ_F_SHOULD_MERGE;
1884 if (g_no_sched)
1885 set->flags |= BLK_MQ_F_NO_SCHED;
1886 if (g_shared_tag_bitmap)
1887 set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1888 set->driver_data = nullb;
1889 if (poll_queues)
1890 set->nr_maps = 3;
1891 else
1892 set->nr_maps = 1;
1893
1894 if ((nullb && nullb->dev->blocking) || g_blocking)
1895 set->flags |= BLK_MQ_F_BLOCKING;
1896
1897 return blk_mq_alloc_tag_set(set);
1898}
1899
1900static int null_validate_conf(struct nullb_device *dev)
1901{
1902 dev->blocksize = round_down(dev->blocksize, 512);
1903 dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
1904
1905 if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
1906 if (dev->submit_queues != nr_online_nodes)
1907 dev->submit_queues = nr_online_nodes;
1908 } else if (dev->submit_queues > nr_cpu_ids)
1909 dev->submit_queues = nr_cpu_ids;
1910 else if (dev->submit_queues == 0)
1911 dev->submit_queues = 1;
1912 dev->prev_submit_queues = dev->submit_queues;
1913
1914 if (dev->poll_queues > g_poll_queues)
1915 dev->poll_queues = g_poll_queues;
1916 dev->prev_poll_queues = dev->poll_queues;
1917
1918 dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
1919 dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
1920
1921
1922 if (dev->memory_backed)
1923 dev->blocking = true;
1924 else
1925 dev->cache_size = 0;
1926 dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
1927 dev->cache_size);
1928 dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
1929
1930 if (dev->queue_mode == NULL_Q_BIO)
1931 dev->mbps = 0;
1932
1933 if (dev->zoned &&
1934 (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
1935 pr_err("zone_size must be power-of-two\n");
1936 return -EINVAL;
1937 }
1938
1939 return 0;
1940}
1941
1942#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1943static bool __null_setup_fault(struct fault_attr *attr, char *str)
1944{
1945 if (!str[0])
1946 return true;
1947
1948 if (!setup_fault_attr(attr, str))
1949 return false;
1950
1951 attr->verbose = 0;
1952 return true;
1953}
1954#endif
1955
1956static bool null_setup_fault(void)
1957{
1958#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1959 if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
1960 return false;
1961 if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
1962 return false;
1963 if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
1964 return false;
1965#endif
1966 return true;
1967}
1968
1969static int null_add_dev(struct nullb_device *dev)
1970{
1971 struct nullb *nullb;
1972 int rv;
1973
1974 rv = null_validate_conf(dev);
1975 if (rv)
1976 return rv;
1977
1978 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
1979 if (!nullb) {
1980 rv = -ENOMEM;
1981 goto out;
1982 }
1983 nullb->dev = dev;
1984 dev->nullb = nullb;
1985
1986 spin_lock_init(&nullb->lock);
1987
1988 rv = setup_queues(nullb);
1989 if (rv)
1990 goto out_free_nullb;
1991
1992 if (dev->queue_mode == NULL_Q_MQ) {
1993 if (shared_tags) {
1994 nullb->tag_set = &tag_set;
1995 rv = 0;
1996 } else {
1997 nullb->tag_set = &nullb->__tag_set;
1998 rv = null_init_tag_set(nullb, nullb->tag_set);
1999 }
2000
2001 if (rv)
2002 goto out_cleanup_queues;
2003
2004 if (!null_setup_fault())
2005 goto out_cleanup_tags;
2006
2007 nullb->tag_set->timeout = 5 * HZ;
2008 nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
2009 if (IS_ERR(nullb->disk)) {
2010 rv = PTR_ERR(nullb->disk);
2011 goto out_cleanup_tags;
2012 }
2013 nullb->q = nullb->disk->queue;
2014 } else if (dev->queue_mode == NULL_Q_BIO) {
2015 rv = -ENOMEM;
2016 nullb->disk = blk_alloc_disk(nullb->dev->home_node);
2017 if (!nullb->disk)
2018 goto out_cleanup_queues;
2019
2020 nullb->q = nullb->disk->queue;
2021 rv = init_driver_queues(nullb);
2022 if (rv)
2023 goto out_cleanup_disk;
2024 }
2025
2026 if (dev->mbps) {
2027 set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
2028 nullb_setup_bwtimer(nullb);
2029 }
2030
2031 if (dev->cache_size > 0) {
2032 set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
2033 blk_queue_write_cache(nullb->q, true, true);
2034 }
2035
2036 if (dev->zoned) {
2037 rv = null_init_zoned_dev(dev, nullb->q);
2038 if (rv)
2039 goto out_cleanup_disk;
2040 }
2041
2042 nullb->q->queuedata = nullb;
2043 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
2044 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
2045
2046 mutex_lock(&lock);
2047 nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
2048 dev->index = nullb->index;
2049 mutex_unlock(&lock);
2050
2051 blk_queue_logical_block_size(nullb->q, dev->blocksize);
2052 blk_queue_physical_block_size(nullb->q, dev->blocksize);
2053 if (!dev->max_sectors)
2054 dev->max_sectors = queue_max_hw_sectors(nullb->q);
2055 dev->max_sectors = min_t(unsigned int, dev->max_sectors,
2056 BLK_DEF_MAX_SECTORS);
2057 blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
2058
2059 if (dev->virt_boundary)
2060 blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
2061
2062 null_config_discard(nullb);
2063
2064 sprintf(nullb->disk_name, "nullb%d", nullb->index);
2065
2066 rv = null_gendisk_register(nullb);
2067 if (rv)
2068 goto out_cleanup_zone;
2069
2070 mutex_lock(&lock);
2071 list_add_tail(&nullb->list, &nullb_list);
2072 mutex_unlock(&lock);
2073
2074 return 0;
2075out_cleanup_zone:
2076 null_free_zoned_dev(dev);
2077out_cleanup_disk:
2078 blk_cleanup_disk(nullb->disk);
2079out_cleanup_tags:
2080 if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
2081 blk_mq_free_tag_set(nullb->tag_set);
2082out_cleanup_queues:
2083 cleanup_queues(nullb);
2084out_free_nullb:
2085 kfree(nullb);
2086 dev->nullb = NULL;
2087out:
2088 return rv;
2089}
2090
2091static int __init null_init(void)
2092{
2093 int ret = 0;
2094 unsigned int i;
2095 struct nullb *nullb;
2096 struct nullb_device *dev;
2097
2098 if (g_bs > PAGE_SIZE) {
2099 pr_warn("invalid block size\n");
2100 pr_warn("defaults block size to %lu\n", PAGE_SIZE);
2101 g_bs = PAGE_SIZE;
2102 }
2103
2104 if (g_max_sectors > BLK_DEF_MAX_SECTORS) {
2105 pr_warn("invalid max sectors\n");
2106 pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS);
2107 g_max_sectors = BLK_DEF_MAX_SECTORS;
2108 }
2109
2110 if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
2111 pr_err("invalid home_node value\n");
2112 g_home_node = NUMA_NO_NODE;
2113 }
2114
2115 if (g_queue_mode == NULL_Q_RQ) {
2116 pr_err("legacy IO path no longer available\n");
2117 return -EINVAL;
2118 }
2119 if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
2120 if (g_submit_queues != nr_online_nodes) {
2121 pr_warn("submit_queues param is set to %u.\n",
2122 nr_online_nodes);
2123 g_submit_queues = nr_online_nodes;
2124 }
2125 } else if (g_submit_queues > nr_cpu_ids)
2126 g_submit_queues = nr_cpu_ids;
2127 else if (g_submit_queues <= 0)
2128 g_submit_queues = 1;
2129
2130 if (g_queue_mode == NULL_Q_MQ && shared_tags) {
2131 ret = null_init_tag_set(NULL, &tag_set);
2132 if (ret)
2133 return ret;
2134 }
2135
2136 config_group_init(&nullb_subsys.su_group);
2137 mutex_init(&nullb_subsys.su_mutex);
2138
2139 ret = configfs_register_subsystem(&nullb_subsys);
2140 if (ret)
2141 goto err_tagset;
2142
2143 mutex_init(&lock);
2144
2145 null_major = register_blkdev(0, "nullb");
2146 if (null_major < 0) {
2147 ret = null_major;
2148 goto err_conf;
2149 }
2150
2151 for (i = 0; i < nr_devices; i++) {
2152 dev = null_alloc_dev();
2153 if (!dev) {
2154 ret = -ENOMEM;
2155 goto err_dev;
2156 }
2157 ret = null_add_dev(dev);
2158 if (ret) {
2159 null_free_dev(dev);
2160 goto err_dev;
2161 }
2162 }
2163
2164 pr_info("module loaded\n");
2165 return 0;
2166
2167err_dev:
2168 while (!list_empty(&nullb_list)) {
2169 nullb = list_entry(nullb_list.next, struct nullb, list);
2170 dev = nullb->dev;
2171 null_del_dev(nullb);
2172 null_free_dev(dev);
2173 }
2174 unregister_blkdev(null_major, "nullb");
2175err_conf:
2176 configfs_unregister_subsystem(&nullb_subsys);
2177err_tagset:
2178 if (g_queue_mode == NULL_Q_MQ && shared_tags)
2179 blk_mq_free_tag_set(&tag_set);
2180 return ret;
2181}
2182
2183static void __exit null_exit(void)
2184{
2185 struct nullb *nullb;
2186
2187 configfs_unregister_subsystem(&nullb_subsys);
2188
2189 unregister_blkdev(null_major, "nullb");
2190
2191 mutex_lock(&lock);
2192 while (!list_empty(&nullb_list)) {
2193 struct nullb_device *dev;
2194
2195 nullb = list_entry(nullb_list.next, struct nullb, list);
2196 dev = nullb->dev;
2197 null_del_dev(nullb);
2198 null_free_dev(dev);
2199 }
2200 mutex_unlock(&lock);
2201
2202 if (g_queue_mode == NULL_Q_MQ && shared_tags)
2203 blk_mq_free_tag_set(&tag_set);
2204}
2205
2206module_init(null_init);
2207module_exit(null_exit);
2208
2209MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
2210MODULE_LICENSE("GPL");
2211