1
2
3
4
5
6
7#include <linux/jiffies.h>
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/vmalloc.h>
11#include <linux/siphash.h>
12#include <net/pkt_sched.h>
13#include <net/sock.h>
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96#define HH_FLOWS_CNT 1024
97#define HHF_ARRAYS_CNT 4
98#define HHF_ARRAYS_LEN 1024
99#define HHF_BIT_MASK_LEN 10
100#define HHF_BIT_MASK 0x3FF
101
102#define WDRR_BUCKET_CNT 2
103enum wdrr_bucket_idx {
104 WDRR_BUCKET_FOR_HH = 0,
105 WDRR_BUCKET_FOR_NON_HH = 1
106};
107
108#define hhf_time_before(a, b) \
109 (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
110
111
112struct hh_flow_state {
113 u32 hash_id;
114 u32 hit_timestamp;
115 struct list_head flowchain;
116};
117
118
119struct wdrr_bucket {
120 struct sk_buff *head;
121 struct sk_buff *tail;
122 struct list_head bucketchain;
123 int deficit;
124};
125
126struct hhf_sched_data {
127 struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
128 siphash_key_t perturbation;
129 u32 quantum;
130 u32 drop_overlimit;
131
132
133 struct list_head *hh_flows;
134 u32 hh_flows_limit;
135 u32 hh_flows_overlimit;
136 u32 hh_flows_total_cnt;
137 u32 hh_flows_current_cnt;
138 u32 *hhf_arrays[HHF_ARRAYS_CNT];
139 u32 hhf_arrays_reset_timestamp;
140
141
142 unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT];
143
144
145
146 struct list_head new_buckets;
147 struct list_head old_buckets;
148
149
150 u32 hhf_reset_timeout;
151
152
153
154 u32 hhf_admit_bytes;
155
156
157
158
159
160
161 u32 hhf_evict_timeout;
162
163
164
165
166
167 u32 hhf_non_hh_weight;
168
169
170
171};
172
173static u32 hhf_time_stamp(void)
174{
175 return jiffies;
176}
177
178
179static struct hh_flow_state *seek_list(const u32 hash,
180 struct list_head *head,
181 struct hhf_sched_data *q)
182{
183 struct hh_flow_state *flow, *next;
184 u32 now = hhf_time_stamp();
185
186 if (list_empty(head))
187 return NULL;
188
189 list_for_each_entry_safe(flow, next, head, flowchain) {
190 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
191
192 if (hhf_time_before(prev, now)) {
193
194
195
196 if (list_is_last(&flow->flowchain, head))
197 return NULL;
198 list_del(&flow->flowchain);
199 kfree(flow);
200 q->hh_flows_current_cnt--;
201 } else if (flow->hash_id == hash) {
202 return flow;
203 }
204 }
205 return NULL;
206}
207
208
209
210
211static struct hh_flow_state *alloc_new_hh(struct list_head *head,
212 struct hhf_sched_data *q)
213{
214 struct hh_flow_state *flow;
215 u32 now = hhf_time_stamp();
216
217 if (!list_empty(head)) {
218
219 list_for_each_entry(flow, head, flowchain) {
220 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
221
222 if (hhf_time_before(prev, now))
223 return flow;
224 }
225 }
226
227 if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
228 q->hh_flows_overlimit++;
229 return NULL;
230 }
231
232 flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC);
233 if (!flow)
234 return NULL;
235
236 q->hh_flows_current_cnt++;
237 INIT_LIST_HEAD(&flow->flowchain);
238 list_add_tail(&flow->flowchain, head);
239
240 return flow;
241}
242
243
244
245
246static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
247{
248 struct hhf_sched_data *q = qdisc_priv(sch);
249 u32 tmp_hash, hash;
250 u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos;
251 struct hh_flow_state *flow;
252 u32 pkt_len, min_hhf_val;
253 int i;
254 u32 prev;
255 u32 now = hhf_time_stamp();
256
257
258 prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout;
259 if (hhf_time_before(prev, now)) {
260 for (i = 0; i < HHF_ARRAYS_CNT; i++)
261 bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
262 q->hhf_arrays_reset_timestamp = now;
263 }
264
265
266 hash = skb_get_hash_perturb(skb, &q->perturbation);
267
268
269 flow_pos = hash & HHF_BIT_MASK;
270 flow = seek_list(hash, &q->hh_flows[flow_pos], q);
271 if (flow) {
272 flow->hit_timestamp = now;
273 return WDRR_BUCKET_FOR_HH;
274 }
275
276
277 tmp_hash = hash;
278 xorsum = 0;
279 for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) {
280
281 filter_pos[i] = tmp_hash & HHF_BIT_MASK;
282 xorsum ^= filter_pos[i];
283 tmp_hash >>= HHF_BIT_MASK_LEN;
284 }
285
286 filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash;
287
288 pkt_len = qdisc_pkt_len(skb);
289 min_hhf_val = ~0U;
290 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
291 u32 val;
292
293 if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) {
294 q->hhf_arrays[i][filter_pos[i]] = 0;
295 __set_bit(filter_pos[i], q->hhf_valid_bits[i]);
296 }
297
298 val = q->hhf_arrays[i][filter_pos[i]] + pkt_len;
299 if (min_hhf_val > val)
300 min_hhf_val = val;
301 }
302
303
304 if (min_hhf_val > q->hhf_admit_bytes) {
305
306 flow = alloc_new_hh(&q->hh_flows[flow_pos], q);
307 if (!flow)
308 return WDRR_BUCKET_FOR_NON_HH;
309 flow->hash_id = hash;
310 flow->hit_timestamp = now;
311 q->hh_flows_total_cnt++;
312
313
314
315
316 return WDRR_BUCKET_FOR_HH;
317 }
318
319
320 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
321 if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val)
322 q->hhf_arrays[i][filter_pos[i]] = min_hhf_val;
323 }
324 return WDRR_BUCKET_FOR_NON_HH;
325}
326
327
328static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
329{
330 struct sk_buff *skb = bucket->head;
331
332 bucket->head = skb->next;
333 skb_mark_not_on_list(skb);
334 return skb;
335}
336
337
338static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
339{
340 if (bucket->head == NULL)
341 bucket->head = skb;
342 else
343 bucket->tail->next = skb;
344 bucket->tail = skb;
345 skb->next = NULL;
346}
347
348static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
349{
350 struct hhf_sched_data *q = qdisc_priv(sch);
351 struct wdrr_bucket *bucket;
352
353
354 bucket = &q->buckets[WDRR_BUCKET_FOR_HH];
355 if (!bucket->head)
356 bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH];
357
358 if (bucket->head) {
359 struct sk_buff *skb = dequeue_head(bucket);
360
361 sch->q.qlen--;
362 qdisc_qstats_backlog_dec(sch, skb);
363 qdisc_drop(skb, sch, to_free);
364 }
365
366
367 return bucket - q->buckets;
368}
369
370static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
371 struct sk_buff **to_free)
372{
373 struct hhf_sched_data *q = qdisc_priv(sch);
374 enum wdrr_bucket_idx idx;
375 struct wdrr_bucket *bucket;
376 unsigned int prev_backlog;
377
378 idx = hhf_classify(skb, sch);
379
380 bucket = &q->buckets[idx];
381 bucket_add(bucket, skb);
382 qdisc_qstats_backlog_inc(sch, skb);
383
384 if (list_empty(&bucket->bucketchain)) {
385 unsigned int weight;
386
387
388
389
390
391 if (idx == WDRR_BUCKET_FOR_HH) {
392
393 weight = 1;
394 list_add_tail(&bucket->bucketchain, &q->old_buckets);
395 } else {
396 weight = q->hhf_non_hh_weight;
397 list_add_tail(&bucket->bucketchain, &q->new_buckets);
398 }
399 bucket->deficit = weight * q->quantum;
400 }
401 if (++sch->q.qlen <= sch->limit)
402 return NET_XMIT_SUCCESS;
403
404 prev_backlog = sch->qstats.backlog;
405 q->drop_overlimit++;
406
407
408
409 if (hhf_drop(sch, to_free) == idx)
410 return NET_XMIT_CN;
411
412
413 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
414 return NET_XMIT_SUCCESS;
415}
416
417static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
418{
419 struct hhf_sched_data *q = qdisc_priv(sch);
420 struct sk_buff *skb = NULL;
421 struct wdrr_bucket *bucket;
422 struct list_head *head;
423
424begin:
425 head = &q->new_buckets;
426 if (list_empty(head)) {
427 head = &q->old_buckets;
428 if (list_empty(head))
429 return NULL;
430 }
431 bucket = list_first_entry(head, struct wdrr_bucket, bucketchain);
432
433 if (bucket->deficit <= 0) {
434 int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ?
435 1 : q->hhf_non_hh_weight;
436
437 bucket->deficit += weight * q->quantum;
438 list_move_tail(&bucket->bucketchain, &q->old_buckets);
439 goto begin;
440 }
441
442 if (bucket->head) {
443 skb = dequeue_head(bucket);
444 sch->q.qlen--;
445 qdisc_qstats_backlog_dec(sch, skb);
446 }
447
448 if (!skb) {
449
450 if ((head == &q->new_buckets) && !list_empty(&q->old_buckets))
451 list_move_tail(&bucket->bucketchain, &q->old_buckets);
452 else
453 list_del_init(&bucket->bucketchain);
454 goto begin;
455 }
456 qdisc_bstats_update(sch, skb);
457 bucket->deficit -= qdisc_pkt_len(skb);
458
459 return skb;
460}
461
462static void hhf_reset(struct Qdisc *sch)
463{
464 struct sk_buff *skb;
465
466 while ((skb = hhf_dequeue(sch)) != NULL)
467 rtnl_kfree_skbs(skb, skb);
468}
469
470static void hhf_destroy(struct Qdisc *sch)
471{
472 int i;
473 struct hhf_sched_data *q = qdisc_priv(sch);
474
475 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
476 kvfree(q->hhf_arrays[i]);
477 kvfree(q->hhf_valid_bits[i]);
478 }
479
480 if (!q->hh_flows)
481 return;
482
483 for (i = 0; i < HH_FLOWS_CNT; i++) {
484 struct hh_flow_state *flow, *next;
485 struct list_head *head = &q->hh_flows[i];
486
487 if (list_empty(head))
488 continue;
489 list_for_each_entry_safe(flow, next, head, flowchain) {
490 list_del(&flow->flowchain);
491 kfree(flow);
492 }
493 }
494 kvfree(q->hh_flows);
495}
496
497static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
498 [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 },
499 [TCA_HHF_QUANTUM] = { .type = NLA_U32 },
500 [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 },
501 [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 },
502 [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 },
503 [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 },
504 [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
505};
506
507static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
508 struct netlink_ext_ack *extack)
509{
510 struct hhf_sched_data *q = qdisc_priv(sch);
511 struct nlattr *tb[TCA_HHF_MAX + 1];
512 unsigned int qlen, prev_backlog;
513 int err;
514 u64 non_hh_quantum;
515 u32 new_quantum = q->quantum;
516 u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
517
518 if (!opt)
519 return -EINVAL;
520
521 err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy,
522 NULL);
523 if (err < 0)
524 return err;
525
526 if (tb[TCA_HHF_QUANTUM])
527 new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
528
529 if (tb[TCA_HHF_NON_HH_WEIGHT])
530 new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
531
532 non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
533 if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX)
534 return -EINVAL;
535
536 sch_tree_lock(sch);
537
538 if (tb[TCA_HHF_BACKLOG_LIMIT])
539 sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
540
541 q->quantum = new_quantum;
542 q->hhf_non_hh_weight = new_hhf_non_hh_weight;
543
544 if (tb[TCA_HHF_HH_FLOWS_LIMIT])
545 q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
546
547 if (tb[TCA_HHF_RESET_TIMEOUT]) {
548 u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
549
550 q->hhf_reset_timeout = usecs_to_jiffies(us);
551 }
552
553 if (tb[TCA_HHF_ADMIT_BYTES])
554 q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
555
556 if (tb[TCA_HHF_EVICT_TIMEOUT]) {
557 u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
558
559 q->hhf_evict_timeout = usecs_to_jiffies(us);
560 }
561
562 qlen = sch->q.qlen;
563 prev_backlog = sch->qstats.backlog;
564 while (sch->q.qlen > sch->limit) {
565 struct sk_buff *skb = hhf_dequeue(sch);
566
567 rtnl_kfree_skbs(skb, skb);
568 }
569 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
570 prev_backlog - sch->qstats.backlog);
571
572 sch_tree_unlock(sch);
573 return 0;
574}
575
576static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
577 struct netlink_ext_ack *extack)
578{
579 struct hhf_sched_data *q = qdisc_priv(sch);
580 int i;
581
582 sch->limit = 1000;
583 q->quantum = psched_mtu(qdisc_dev(sch));
584 get_random_bytes(&q->perturbation, sizeof(q->perturbation));
585 INIT_LIST_HEAD(&q->new_buckets);
586 INIT_LIST_HEAD(&q->old_buckets);
587
588
589 q->hhf_reset_timeout = HZ / 25;
590 q->hhf_admit_bytes = 131072;
591 q->hhf_evict_timeout = HZ;
592 q->hhf_non_hh_weight = 2;
593
594 if (opt) {
595 int err = hhf_change(sch, opt, extack);
596
597 if (err)
598 return err;
599 }
600
601 if (!q->hh_flows) {
602
603 q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head),
604 GFP_KERNEL);
605 if (!q->hh_flows)
606 return -ENOMEM;
607 for (i = 0; i < HH_FLOWS_CNT; i++)
608 INIT_LIST_HEAD(&q->hh_flows[i]);
609
610
611 q->hh_flows_limit = 2 * HH_FLOWS_CNT;
612 q->hh_flows_overlimit = 0;
613 q->hh_flows_total_cnt = 0;
614 q->hh_flows_current_cnt = 0;
615
616
617 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
618 q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN,
619 sizeof(u32),
620 GFP_KERNEL);
621 if (!q->hhf_arrays[i]) {
622
623
624
625 return -ENOMEM;
626 }
627 }
628 q->hhf_arrays_reset_timestamp = hhf_time_stamp();
629
630
631 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
632 q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN /
633 BITS_PER_BYTE, GFP_KERNEL);
634 if (!q->hhf_valid_bits[i]) {
635
636
637
638 return -ENOMEM;
639 }
640 }
641
642
643 for (i = 0; i < WDRR_BUCKET_CNT; i++) {
644 struct wdrr_bucket *bucket = q->buckets + i;
645
646 INIT_LIST_HEAD(&bucket->bucketchain);
647 }
648 }
649
650 return 0;
651}
652
653static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
654{
655 struct hhf_sched_data *q = qdisc_priv(sch);
656 struct nlattr *opts;
657
658 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
659 if (opts == NULL)
660 goto nla_put_failure;
661
662 if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
663 nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
664 nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
665 nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
666 jiffies_to_usecs(q->hhf_reset_timeout)) ||
667 nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
668 nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
669 jiffies_to_usecs(q->hhf_evict_timeout)) ||
670 nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
671 goto nla_put_failure;
672
673 return nla_nest_end(skb, opts);
674
675nla_put_failure:
676 return -1;
677}
678
679static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
680{
681 struct hhf_sched_data *q = qdisc_priv(sch);
682 struct tc_hhf_xstats st = {
683 .drop_overlimit = q->drop_overlimit,
684 .hh_overlimit = q->hh_flows_overlimit,
685 .hh_tot_count = q->hh_flows_total_cnt,
686 .hh_cur_count = q->hh_flows_current_cnt,
687 };
688
689 return gnet_stats_copy_app(d, &st, sizeof(st));
690}
691
692static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
693 .id = "hhf",
694 .priv_size = sizeof(struct hhf_sched_data),
695
696 .enqueue = hhf_enqueue,
697 .dequeue = hhf_dequeue,
698 .peek = qdisc_peek_dequeued,
699 .init = hhf_init,
700 .reset = hhf_reset,
701 .destroy = hhf_destroy,
702 .change = hhf_change,
703 .dump = hhf_dump,
704 .dump_stats = hhf_dump_stats,
705 .owner = THIS_MODULE,
706};
707
708static int __init hhf_module_init(void)
709{
710 return register_qdisc(&hhf_qdisc_ops);
711}
712
713static void __exit hhf_module_exit(void)
714{
715 unregister_qdisc(&hhf_qdisc_ops);
716}
717
718module_init(hhf_module_init)
719module_exit(hhf_module_exit)
720MODULE_AUTHOR("Terry Lam");
721MODULE_AUTHOR("Nandita Dukkipati");
722MODULE_LICENSE("GPL");
723