1
2
3
4
5
6
7#include <linux/jhash.h>
8#include <linux/jiffies.h>
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/vmalloc.h>
12#include <net/pkt_sched.h>
13#include <net/sock.h>
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96#define HH_FLOWS_CNT 1024
97#define HHF_ARRAYS_CNT 4
98#define HHF_ARRAYS_LEN 1024
99#define HHF_BIT_MASK_LEN 10
100#define HHF_BIT_MASK 0x3FF
101
102#define WDRR_BUCKET_CNT 2
103enum wdrr_bucket_idx {
104 WDRR_BUCKET_FOR_HH = 0,
105 WDRR_BUCKET_FOR_NON_HH = 1
106};
107
108#define hhf_time_before(a, b) \
109 (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
110
111
112struct hh_flow_state {
113 u32 hash_id;
114 u32 hit_timestamp;
115 struct list_head flowchain;
116};
117
118
119struct wdrr_bucket {
120 struct sk_buff *head;
121 struct sk_buff *tail;
122 struct list_head bucketchain;
123 int deficit;
124};
125
126struct hhf_sched_data {
127 struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
128 u32 perturbation;
129 u32 quantum;
130 u32 drop_overlimit;
131
132
133 struct list_head *hh_flows;
134 u32 hh_flows_limit;
135 u32 hh_flows_overlimit;
136 u32 hh_flows_total_cnt;
137 u32 hh_flows_current_cnt;
138 u32 *hhf_arrays[HHF_ARRAYS_CNT];
139 u32 hhf_arrays_reset_timestamp;
140
141
142 unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT];
143
144
145
146 struct list_head new_buckets;
147 struct list_head old_buckets;
148
149
150 u32 hhf_reset_timeout;
151
152
153
154 u32 hhf_admit_bytes;
155
156
157
158
159
160
161 u32 hhf_evict_timeout;
162
163
164
165
166
167 u32 hhf_non_hh_weight;
168
169
170
171};
172
173static u32 hhf_time_stamp(void)
174{
175 return jiffies;
176}
177
178
179static struct hh_flow_state *seek_list(const u32 hash,
180 struct list_head *head,
181 struct hhf_sched_data *q)
182{
183 struct hh_flow_state *flow, *next;
184 u32 now = hhf_time_stamp();
185
186 if (list_empty(head))
187 return NULL;
188
189 list_for_each_entry_safe(flow, next, head, flowchain) {
190 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
191
192 if (hhf_time_before(prev, now)) {
193
194
195
196 if (list_is_last(&flow->flowchain, head))
197 return NULL;
198 list_del(&flow->flowchain);
199 kfree(flow);
200 q->hh_flows_current_cnt--;
201 } else if (flow->hash_id == hash) {
202 return flow;
203 }
204 }
205 return NULL;
206}
207
208
209
210
211static struct hh_flow_state *alloc_new_hh(struct list_head *head,
212 struct hhf_sched_data *q)
213{
214 struct hh_flow_state *flow;
215 u32 now = hhf_time_stamp();
216
217 if (!list_empty(head)) {
218
219 list_for_each_entry(flow, head, flowchain) {
220 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
221
222 if (hhf_time_before(prev, now))
223 return flow;
224 }
225 }
226
227 if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
228 q->hh_flows_overlimit++;
229 return NULL;
230 }
231
232 flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC);
233 if (!flow)
234 return NULL;
235
236 q->hh_flows_current_cnt++;
237 INIT_LIST_HEAD(&flow->flowchain);
238 list_add_tail(&flow->flowchain, head);
239
240 return flow;
241}
242
243
244
245
246static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
247{
248 struct hhf_sched_data *q = qdisc_priv(sch);
249 u32 tmp_hash, hash;
250 u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos;
251 struct hh_flow_state *flow;
252 u32 pkt_len, min_hhf_val;
253 int i;
254 u32 prev;
255 u32 now = hhf_time_stamp();
256
257
258 prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout;
259 if (hhf_time_before(prev, now)) {
260 for (i = 0; i < HHF_ARRAYS_CNT; i++)
261 bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
262 q->hhf_arrays_reset_timestamp = now;
263 }
264
265
266 hash = skb_get_hash_perturb(skb, q->perturbation);
267
268
269 flow_pos = hash & HHF_BIT_MASK;
270 flow = seek_list(hash, &q->hh_flows[flow_pos], q);
271 if (flow) {
272 flow->hit_timestamp = now;
273 return WDRR_BUCKET_FOR_HH;
274 }
275
276
277 tmp_hash = hash;
278 xorsum = 0;
279 for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) {
280
281 filter_pos[i] = tmp_hash & HHF_BIT_MASK;
282 xorsum ^= filter_pos[i];
283 tmp_hash >>= HHF_BIT_MASK_LEN;
284 }
285
286 filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash;
287
288 pkt_len = qdisc_pkt_len(skb);
289 min_hhf_val = ~0U;
290 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
291 u32 val;
292
293 if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) {
294 q->hhf_arrays[i][filter_pos[i]] = 0;
295 __set_bit(filter_pos[i], q->hhf_valid_bits[i]);
296 }
297
298 val = q->hhf_arrays[i][filter_pos[i]] + pkt_len;
299 if (min_hhf_val > val)
300 min_hhf_val = val;
301 }
302
303
304 if (min_hhf_val > q->hhf_admit_bytes) {
305
306 flow = alloc_new_hh(&q->hh_flows[flow_pos], q);
307 if (!flow)
308 return WDRR_BUCKET_FOR_NON_HH;
309 flow->hash_id = hash;
310 flow->hit_timestamp = now;
311 q->hh_flows_total_cnt++;
312
313
314
315
316 return WDRR_BUCKET_FOR_HH;
317 }
318
319
320 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
321 if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val)
322 q->hhf_arrays[i][filter_pos[i]] = min_hhf_val;
323 }
324 return WDRR_BUCKET_FOR_NON_HH;
325}
326
327
328static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
329{
330 struct sk_buff *skb = bucket->head;
331
332 bucket->head = skb->next;
333 skb->next = NULL;
334 return skb;
335}
336
337
338static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
339{
340 if (bucket->head == NULL)
341 bucket->head = skb;
342 else
343 bucket->tail->next = skb;
344 bucket->tail = skb;
345 skb->next = NULL;
346}
347
348static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
349{
350 struct hhf_sched_data *q = qdisc_priv(sch);
351 struct wdrr_bucket *bucket;
352
353
354 bucket = &q->buckets[WDRR_BUCKET_FOR_HH];
355 if (!bucket->head)
356 bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH];
357
358 if (bucket->head) {
359 struct sk_buff *skb = dequeue_head(bucket);
360
361 sch->q.qlen--;
362 qdisc_qstats_backlog_dec(sch, skb);
363 qdisc_drop(skb, sch, to_free);
364 }
365
366
367 return bucket - q->buckets;
368}
369
370static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
371 struct sk_buff **to_free)
372{
373 struct hhf_sched_data *q = qdisc_priv(sch);
374 enum wdrr_bucket_idx idx;
375 struct wdrr_bucket *bucket;
376 unsigned int prev_backlog;
377
378 idx = hhf_classify(skb, sch);
379
380 bucket = &q->buckets[idx];
381 bucket_add(bucket, skb);
382 qdisc_qstats_backlog_inc(sch, skb);
383
384 if (list_empty(&bucket->bucketchain)) {
385 unsigned int weight;
386
387
388
389
390
391 if (idx == WDRR_BUCKET_FOR_HH) {
392
393 weight = 1;
394 list_add_tail(&bucket->bucketchain, &q->old_buckets);
395 } else {
396 weight = q->hhf_non_hh_weight;
397 list_add_tail(&bucket->bucketchain, &q->new_buckets);
398 }
399 bucket->deficit = weight * q->quantum;
400 }
401 if (++sch->q.qlen <= sch->limit)
402 return NET_XMIT_SUCCESS;
403
404 prev_backlog = sch->qstats.backlog;
405 q->drop_overlimit++;
406
407
408
409 if (hhf_drop(sch, to_free) == idx)
410 return NET_XMIT_CN;
411
412
413 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
414 return NET_XMIT_SUCCESS;
415}
416
417static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
418{
419 struct hhf_sched_data *q = qdisc_priv(sch);
420 struct sk_buff *skb = NULL;
421 struct wdrr_bucket *bucket;
422 struct list_head *head;
423
424begin:
425 head = &q->new_buckets;
426 if (list_empty(head)) {
427 head = &q->old_buckets;
428 if (list_empty(head))
429 return NULL;
430 }
431 bucket = list_first_entry(head, struct wdrr_bucket, bucketchain);
432
433 if (bucket->deficit <= 0) {
434 int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ?
435 1 : q->hhf_non_hh_weight;
436
437 bucket->deficit += weight * q->quantum;
438 list_move_tail(&bucket->bucketchain, &q->old_buckets);
439 goto begin;
440 }
441
442 if (bucket->head) {
443 skb = dequeue_head(bucket);
444 sch->q.qlen--;
445 qdisc_qstats_backlog_dec(sch, skb);
446 }
447
448 if (!skb) {
449
450 if ((head == &q->new_buckets) && !list_empty(&q->old_buckets))
451 list_move_tail(&bucket->bucketchain, &q->old_buckets);
452 else
453 list_del_init(&bucket->bucketchain);
454 goto begin;
455 }
456 qdisc_bstats_update(sch, skb);
457 bucket->deficit -= qdisc_pkt_len(skb);
458
459 return skb;
460}
461
462static void hhf_reset(struct Qdisc *sch)
463{
464 struct sk_buff *skb;
465
466 while ((skb = hhf_dequeue(sch)) != NULL)
467 rtnl_kfree_skbs(skb, skb);
468}
469
470static void hhf_destroy(struct Qdisc *sch)
471{
472 int i;
473 struct hhf_sched_data *q = qdisc_priv(sch);
474
475 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
476 kvfree(q->hhf_arrays[i]);
477 kvfree(q->hhf_valid_bits[i]);
478 }
479
480 if (!q->hh_flows)
481 return;
482
483 for (i = 0; i < HH_FLOWS_CNT; i++) {
484 struct hh_flow_state *flow, *next;
485 struct list_head *head = &q->hh_flows[i];
486
487 if (list_empty(head))
488 continue;
489 list_for_each_entry_safe(flow, next, head, flowchain) {
490 list_del(&flow->flowchain);
491 kfree(flow);
492 }
493 }
494 kvfree(q->hh_flows);
495}
496
497static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
498 [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 },
499 [TCA_HHF_QUANTUM] = { .type = NLA_U32 },
500 [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 },
501 [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 },
502 [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 },
503 [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 },
504 [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
505};
506
507static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
508{
509 struct hhf_sched_data *q = qdisc_priv(sch);
510 struct nlattr *tb[TCA_HHF_MAX + 1];
511 unsigned int qlen, prev_backlog;
512 int err;
513 u64 non_hh_quantum;
514 u32 new_quantum = q->quantum;
515 u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
516
517 if (!opt)
518 return -EINVAL;
519
520 err = nla_parse_nested(tb, TCA_HHF_MAX, opt, hhf_policy, NULL);
521 if (err < 0)
522 return err;
523
524 if (tb[TCA_HHF_QUANTUM])
525 new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
526
527 if (tb[TCA_HHF_NON_HH_WEIGHT])
528 new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
529
530 non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
531 if (non_hh_quantum > INT_MAX)
532 return -EINVAL;
533
534 sch_tree_lock(sch);
535
536 if (tb[TCA_HHF_BACKLOG_LIMIT])
537 sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
538
539 q->quantum = new_quantum;
540 q->hhf_non_hh_weight = new_hhf_non_hh_weight;
541
542 if (tb[TCA_HHF_HH_FLOWS_LIMIT])
543 q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
544
545 if (tb[TCA_HHF_RESET_TIMEOUT]) {
546 u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
547
548 q->hhf_reset_timeout = usecs_to_jiffies(us);
549 }
550
551 if (tb[TCA_HHF_ADMIT_BYTES])
552 q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
553
554 if (tb[TCA_HHF_EVICT_TIMEOUT]) {
555 u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
556
557 q->hhf_evict_timeout = usecs_to_jiffies(us);
558 }
559
560 qlen = sch->q.qlen;
561 prev_backlog = sch->qstats.backlog;
562 while (sch->q.qlen > sch->limit) {
563 struct sk_buff *skb = hhf_dequeue(sch);
564
565 rtnl_kfree_skbs(skb, skb);
566 }
567 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
568 prev_backlog - sch->qstats.backlog);
569
570 sch_tree_unlock(sch);
571 return 0;
572}
573
574static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
575{
576 struct hhf_sched_data *q = qdisc_priv(sch);
577 int i;
578
579 sch->limit = 1000;
580 q->quantum = psched_mtu(qdisc_dev(sch));
581 q->perturbation = prandom_u32();
582 INIT_LIST_HEAD(&q->new_buckets);
583 INIT_LIST_HEAD(&q->old_buckets);
584
585
586 q->hhf_reset_timeout = HZ / 25;
587 q->hhf_admit_bytes = 131072;
588 q->hhf_evict_timeout = HZ;
589 q->hhf_non_hh_weight = 2;
590
591 if (opt) {
592 int err = hhf_change(sch, opt);
593
594 if (err)
595 return err;
596 }
597
598 if (!q->hh_flows) {
599
600 q->hh_flows = kvzalloc(HH_FLOWS_CNT *
601 sizeof(struct list_head), GFP_KERNEL);
602 if (!q->hh_flows)
603 return -ENOMEM;
604 for (i = 0; i < HH_FLOWS_CNT; i++)
605 INIT_LIST_HEAD(&q->hh_flows[i]);
606
607
608 q->hh_flows_limit = 2 * HH_FLOWS_CNT;
609 q->hh_flows_overlimit = 0;
610 q->hh_flows_total_cnt = 0;
611 q->hh_flows_current_cnt = 0;
612
613
614 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
615 q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN *
616 sizeof(u32), GFP_KERNEL);
617 if (!q->hhf_arrays[i]) {
618
619
620
621 return -ENOMEM;
622 }
623 }
624 q->hhf_arrays_reset_timestamp = hhf_time_stamp();
625
626
627 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
628 q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN /
629 BITS_PER_BYTE, GFP_KERNEL);
630 if (!q->hhf_valid_bits[i]) {
631
632
633
634 return -ENOMEM;
635 }
636 }
637
638
639 for (i = 0; i < WDRR_BUCKET_CNT; i++) {
640 struct wdrr_bucket *bucket = q->buckets + i;
641
642 INIT_LIST_HEAD(&bucket->bucketchain);
643 }
644 }
645
646 return 0;
647}
648
649static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
650{
651 struct hhf_sched_data *q = qdisc_priv(sch);
652 struct nlattr *opts;
653
654 opts = nla_nest_start(skb, TCA_OPTIONS);
655 if (opts == NULL)
656 goto nla_put_failure;
657
658 if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
659 nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
660 nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
661 nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
662 jiffies_to_usecs(q->hhf_reset_timeout)) ||
663 nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
664 nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
665 jiffies_to_usecs(q->hhf_evict_timeout)) ||
666 nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
667 goto nla_put_failure;
668
669 return nla_nest_end(skb, opts);
670
671nla_put_failure:
672 return -1;
673}
674
675static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
676{
677 struct hhf_sched_data *q = qdisc_priv(sch);
678 struct tc_hhf_xstats st = {
679 .drop_overlimit = q->drop_overlimit,
680 .hh_overlimit = q->hh_flows_overlimit,
681 .hh_tot_count = q->hh_flows_total_cnt,
682 .hh_cur_count = q->hh_flows_current_cnt,
683 };
684
685 return gnet_stats_copy_app(d, &st, sizeof(st));
686}
687
688static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
689 .id = "hhf",
690 .priv_size = sizeof(struct hhf_sched_data),
691
692 .enqueue = hhf_enqueue,
693 .dequeue = hhf_dequeue,
694 .peek = qdisc_peek_dequeued,
695 .init = hhf_init,
696 .reset = hhf_reset,
697 .destroy = hhf_destroy,
698 .change = hhf_change,
699 .dump = hhf_dump,
700 .dump_stats = hhf_dump_stats,
701 .owner = THIS_MODULE,
702};
703
704static int __init hhf_module_init(void)
705{
706 return register_qdisc(&hhf_qdisc_ops);
707}
708
709static void __exit hhf_module_exit(void)
710{
711 unregister_qdisc(&hhf_qdisc_ops);
712}
713
714module_init(hhf_module_init)
715module_exit(hhf_module_exit)
716MODULE_AUTHOR("Terry Lam");
717MODULE_AUTHOR("Nandita Dukkipati");
718MODULE_LICENSE("GPL");
719