1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "dm.h"
25#include "dm-path-selector.h"
26
27#include <linux/blkdev.h>
28#include <linux/slab.h>
29#include <linux/module.h>
30
31
32#define DM_MSG_PREFIX "multipath historical-service-time"
33#define HST_MIN_IO 1
34#define HST_VERSION "0.1.1"
35
36#define HST_FIXED_SHIFT 10
37#define HST_FIXED_MAX (ULLONG_MAX >> HST_FIXED_SHIFT)
38#define HST_FIXED_1 (1 << HST_FIXED_SHIFT)
39#define HST_FIXED_95 972
40#define HST_MAX_INFLIGHT HST_FIXED_1
41#define HST_BUCKET_SHIFT 24
42#define HST_WEIGHT_COUNT 64ULL
43
44struct selector {
45 struct list_head valid_paths;
46 struct list_head failed_paths;
47 int valid_count;
48 spinlock_t lock;
49
50 unsigned int weights[HST_WEIGHT_COUNT];
51 unsigned int threshold_multiplier;
52};
53
54struct path_info {
55 struct list_head list;
56 struct dm_path *path;
57 unsigned int repeat_count;
58
59 spinlock_t lock;
60
61 u64 historical_service_time;
62
63 u64 stale_after;
64 u64 last_finish;
65
66 u64 outstanding;
67};
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static u64 fixed_power(u64 x, unsigned int frac_bits, unsigned int n)
87{
88 unsigned long result = 1UL << frac_bits;
89
90 if (n) {
91 for (;;) {
92 if (n & 1) {
93 result *= x;
94 result += 1UL << (frac_bits - 1);
95 result >>= frac_bits;
96 }
97 n >>= 1;
98 if (!n)
99 break;
100 x *= x;
101 x += 1UL << (frac_bits - 1);
102 x >>= frac_bits;
103 }
104 }
105
106 return result;
107}
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static u64 fixed_ema(u64 last, u64 next, u64 weight)
123{
124 last *= weight;
125 last += next * (HST_FIXED_1 - weight);
126 last += 1ULL << (HST_FIXED_SHIFT - 1);
127 return last >> HST_FIXED_SHIFT;
128}
129
130static struct selector *alloc_selector(void)
131{
132 struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
133
134 if (s) {
135 INIT_LIST_HEAD(&s->valid_paths);
136 INIT_LIST_HEAD(&s->failed_paths);
137 spin_lock_init(&s->lock);
138 s->valid_count = 0;
139 }
140
141 return s;
142}
143
144
145
146
147static u64 hst_weight(struct path_selector *ps, u64 delta)
148{
149 struct selector *s = ps->context;
150 int bucket = clamp(delta >> HST_BUCKET_SHIFT, 0ULL,
151 HST_WEIGHT_COUNT - 1);
152
153 return s->weights[bucket];
154}
155
156
157
158
159
160
161
162static void hst_set_weights(struct path_selector *ps, unsigned int base)
163{
164 struct selector *s = ps->context;
165 int i;
166
167 if (base >= HST_FIXED_1)
168 return;
169
170 for (i = 0; i < HST_WEIGHT_COUNT - 1; i++)
171 s->weights[i] = fixed_power(base, HST_FIXED_SHIFT, i + 1);
172 s->weights[HST_WEIGHT_COUNT - 1] = 0;
173}
174
175static int hst_create(struct path_selector *ps, unsigned int argc, char **argv)
176{
177 struct selector *s;
178 unsigned int base_weight = HST_FIXED_95;
179 unsigned int threshold_multiplier = 0;
180 char dummy;
181
182
183
184
185
186
187
188
189
190
191
192
193 if (argc > 2)
194 return -EINVAL;
195
196 if (argc && (sscanf(argv[0], "%u%c", &base_weight, &dummy) != 1 ||
197 base_weight >= HST_FIXED_1)) {
198 return -EINVAL;
199 }
200
201 if (argc > 1 && (sscanf(argv[1], "%u%c",
202 &threshold_multiplier, &dummy) != 1)) {
203 return -EINVAL;
204 }
205
206 s = alloc_selector();
207 if (!s)
208 return -ENOMEM;
209
210 ps->context = s;
211
212 hst_set_weights(ps, base_weight);
213 s->threshold_multiplier = threshold_multiplier;
214 return 0;
215}
216
217static void free_paths(struct list_head *paths)
218{
219 struct path_info *pi, *next;
220
221 list_for_each_entry_safe(pi, next, paths, list) {
222 list_del(&pi->list);
223 kfree(pi);
224 }
225}
226
227static void hst_destroy(struct path_selector *ps)
228{
229 struct selector *s = ps->context;
230
231 free_paths(&s->valid_paths);
232 free_paths(&s->failed_paths);
233 kfree(s);
234 ps->context = NULL;
235}
236
237static int hst_status(struct path_selector *ps, struct dm_path *path,
238 status_type_t type, char *result, unsigned int maxlen)
239{
240 unsigned int sz = 0;
241 struct path_info *pi;
242
243 if (!path) {
244 struct selector *s = ps->context;
245
246 DMEMIT("2 %u %u ", s->weights[0], s->threshold_multiplier);
247 } else {
248 pi = path->pscontext;
249
250 switch (type) {
251 case STATUSTYPE_INFO:
252 DMEMIT("%llu %llu %llu ", pi->historical_service_time,
253 pi->outstanding, pi->stale_after);
254 break;
255 case STATUSTYPE_TABLE:
256 DMEMIT("0 ");
257 break;
258 }
259 }
260
261 return sz;
262}
263
264static int hst_add_path(struct path_selector *ps, struct dm_path *path,
265 int argc, char **argv, char **error)
266{
267 struct selector *s = ps->context;
268 struct path_info *pi;
269 unsigned int repeat_count = HST_MIN_IO;
270 char dummy;
271 unsigned long flags;
272
273
274
275
276
277
278 if (argc > 1) {
279 *error = "historical-service-time ps: incorrect number of arguments";
280 return -EINVAL;
281 }
282
283 if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
284 *error = "historical-service-time ps: invalid repeat count";
285 return -EINVAL;
286 }
287
288
289 pi = kmalloc(sizeof(*pi), GFP_KERNEL);
290 if (!pi) {
291 *error = "historical-service-time ps: Error allocating path context";
292 return -ENOMEM;
293 }
294
295 pi->path = path;
296 pi->repeat_count = repeat_count;
297
298 pi->historical_service_time = HST_FIXED_1;
299
300 spin_lock_init(&pi->lock);
301 pi->outstanding = 0;
302
303 pi->stale_after = 0;
304 pi->last_finish = 0;
305
306 path->pscontext = pi;
307
308 spin_lock_irqsave(&s->lock, flags);
309 list_add_tail(&pi->list, &s->valid_paths);
310 s->valid_count++;
311 spin_unlock_irqrestore(&s->lock, flags);
312
313 return 0;
314}
315
316static void hst_fail_path(struct path_selector *ps, struct dm_path *path)
317{
318 struct selector *s = ps->context;
319 struct path_info *pi = path->pscontext;
320 unsigned long flags;
321
322 spin_lock_irqsave(&s->lock, flags);
323 list_move(&pi->list, &s->failed_paths);
324 s->valid_count--;
325 spin_unlock_irqrestore(&s->lock, flags);
326}
327
328static int hst_reinstate_path(struct path_selector *ps, struct dm_path *path)
329{
330 struct selector *s = ps->context;
331 struct path_info *pi = path->pscontext;
332 unsigned long flags;
333
334 spin_lock_irqsave(&s->lock, flags);
335 list_move_tail(&pi->list, &s->valid_paths);
336 s->valid_count++;
337 spin_unlock_irqrestore(&s->lock, flags);
338
339 return 0;
340}
341
342static void hst_fill_compare(struct path_info *pi, u64 *hst,
343 u64 *out, u64 *stale)
344{
345 unsigned long flags;
346
347 spin_lock_irqsave(&pi->lock, flags);
348 *hst = pi->historical_service_time;
349 *out = pi->outstanding;
350 *stale = pi->stale_after;
351 spin_unlock_irqrestore(&pi->lock, flags);
352}
353
354
355
356
357
358
359
360
361
362
363
364static long long hst_compare(struct path_info *pi1, struct path_info *pi2,
365 u64 time_now, struct path_selector *ps)
366{
367 struct selector *s = ps->context;
368 u64 hst1, hst2;
369 long long out1, out2, stale1, stale2;
370 int pi2_better, over_threshold;
371
372 hst_fill_compare(pi1, &hst1, &out1, &stale1);
373 hst_fill_compare(pi2, &hst2, &out2, &stale2);
374
375
376
377
378
379
380 if (hst1 > hst2)
381 over_threshold = hst1 > (s->threshold_multiplier * hst2);
382 else
383 over_threshold = hst2 > (s->threshold_multiplier * hst1);
384
385 if (!over_threshold)
386 return out1 - out2;
387
388
389
390
391
392
393 if ((!out1 && stale1 < time_now) || (!out2 && stale2 < time_now) ||
394 (!out1 && !out2))
395 return (!out2 * stale1) - (!out1 * stale2);
396
397
398
399
400 if (out1 == out2) {
401 pi2_better = hst1 > hst2;
402 } else {
403
404 if (unlikely(out1 >= HST_MAX_INFLIGHT ||
405 out2 >= HST_MAX_INFLIGHT)) {
406
407
408
409
410 hst1 >>= HST_FIXED_SHIFT;
411 hst2 >>= HST_FIXED_SHIFT;
412 }
413 pi2_better = (1 + out1) * hst1 > (1 + out2) * hst2;
414 }
415
416
417 if (pi2_better) {
418 if (stale2 < time_now)
419 return out1 - out2;
420 return 1;
421 }
422 if (stale1 < time_now)
423 return out1 - out2;
424 return -1;
425}
426
427static struct dm_path *hst_select_path(struct path_selector *ps,
428 size_t nr_bytes)
429{
430 struct selector *s = ps->context;
431 struct path_info *pi = NULL, *best = NULL;
432 u64 time_now = sched_clock();
433 struct dm_path *ret = NULL;
434 unsigned long flags;
435
436 spin_lock_irqsave(&s->lock, flags);
437 if (list_empty(&s->valid_paths))
438 goto out;
439
440 list_for_each_entry(pi, &s->valid_paths, list) {
441 if (!best || (hst_compare(pi, best, time_now, ps) < 0))
442 best = pi;
443 }
444
445 if (!best)
446 goto out;
447
448
449 list_move_tail(&best->list, &s->valid_paths);
450
451 ret = best->path;
452
453out:
454 spin_unlock_irqrestore(&s->lock, flags);
455 return ret;
456}
457
458static int hst_start_io(struct path_selector *ps, struct dm_path *path,
459 size_t nr_bytes)
460{
461 struct path_info *pi = path->pscontext;
462 unsigned long flags;
463
464 spin_lock_irqsave(&pi->lock, flags);
465 pi->outstanding++;
466 spin_unlock_irqrestore(&pi->lock, flags);
467
468 return 0;
469}
470
471static u64 path_service_time(struct path_info *pi, u64 start_time)
472{
473 u64 sched_now = ktime_get_ns();
474
475
476
477
478
479 if (time_after64(pi->last_finish, start_time))
480 start_time = pi->last_finish;
481
482 pi->last_finish = sched_now;
483 if (time_before64(sched_now, start_time))
484 return 0;
485
486 return sched_now - start_time;
487}
488
489static int hst_end_io(struct path_selector *ps, struct dm_path *path,
490 size_t nr_bytes, u64 start_time)
491{
492 struct path_info *pi = path->pscontext;
493 struct selector *s = ps->context;
494 unsigned long flags;
495 u64 st;
496
497 spin_lock_irqsave(&pi->lock, flags);
498
499 st = path_service_time(pi, start_time);
500 pi->outstanding--;
501 pi->historical_service_time =
502 fixed_ema(pi->historical_service_time,
503 min(st * HST_FIXED_1, HST_FIXED_MAX),
504 hst_weight(ps, st));
505
506
507
508
509
510
511
512 pi->stale_after = pi->last_finish +
513 (s->valid_count * (pi->historical_service_time >> HST_FIXED_SHIFT));
514
515 spin_unlock_irqrestore(&pi->lock, flags);
516
517 return 0;
518}
519
520static struct path_selector_type hst_ps = {
521 .name = "historical-service-time",
522 .module = THIS_MODULE,
523 .table_args = 1,
524 .info_args = 3,
525 .create = hst_create,
526 .destroy = hst_destroy,
527 .status = hst_status,
528 .add_path = hst_add_path,
529 .fail_path = hst_fail_path,
530 .reinstate_path = hst_reinstate_path,
531 .select_path = hst_select_path,
532 .start_io = hst_start_io,
533 .end_io = hst_end_io,
534};
535
536static int __init dm_hst_init(void)
537{
538 int r = dm_register_path_selector(&hst_ps);
539
540 if (r < 0)
541 DMERR("register failed %d", r);
542
543 DMINFO("version " HST_VERSION " loaded");
544
545 return r;
546}
547
548static void __exit dm_hst_exit(void)
549{
550 int r = dm_unregister_path_selector(&hst_ps);
551
552 if (r < 0)
553 DMERR("unregister failed %d", r);
554}
555
556module_init(dm_hst_init);
557module_exit(dm_hst_exit);
558
559MODULE_DESCRIPTION(DM_NAME " measured service time oriented path selector");
560MODULE_AUTHOR("Khazhismel Kumykov <khazhy@google.com>");
561MODULE_LICENSE("GPL");
562