1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/errno.h>
20#include <linux/skbuff.h>
21#include <linux/rtnetlink.h>
22
23#include <net/netlink.h>
24#include <net/pkt_sched.h>
25
26#define VERSION "1.2"
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51struct netem_sched_data {
52 struct Qdisc *qdisc;
53 struct qdisc_watchdog watchdog;
54
55 psched_tdiff_t latency;
56 psched_tdiff_t jitter;
57
58 u32 loss;
59 u32 limit;
60 u32 counter;
61 u32 gap;
62 u32 duplicate;
63 u32 reorder;
64 u32 corrupt;
65
66 struct crndstate {
67 u32 last;
68 u32 rho;
69 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
70
71 struct disttable {
72 u32 size;
73 s16 table[0];
74 } *delay_dist;
75};
76
77
78struct netem_skb_cb {
79 psched_time_t time_to_send;
80};
81
82static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
83{
84 BUILD_BUG_ON(sizeof(skb->cb) <
85 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
86 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
87}
88
89
90
91
92static void init_crandom(struct crndstate *state, unsigned long rho)
93{
94 state->rho = rho;
95 state->last = net_random();
96}
97
98
99
100
101
102static u32 get_crandom(struct crndstate *state)
103{
104 u64 value, rho;
105 unsigned long answer;
106
107 if (state->rho == 0)
108 return net_random();
109
110 value = net_random();
111 rho = (u64)state->rho + 1;
112 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
113 state->last = answer;
114 return answer;
115}
116
117
118
119
120
121static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
122 struct crndstate *state,
123 const struct disttable *dist)
124{
125 psched_tdiff_t x;
126 long t;
127 u32 rnd;
128
129 if (sigma == 0)
130 return mu;
131
132 rnd = get_crandom(state);
133
134
135 if (dist == NULL)
136 return (rnd % (2*sigma)) - sigma + mu;
137
138 t = dist->table[rnd % dist->size];
139 x = (sigma % NETEM_DIST_SCALE) * t;
140 if (x >= 0)
141 x += NETEM_DIST_SCALE/2;
142 else
143 x -= NETEM_DIST_SCALE/2;
144
145 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
146}
147
148
149
150
151
152
153
154static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
155{
156 struct netem_sched_data *q = qdisc_priv(sch);
157
158 struct netem_skb_cb *cb;
159 struct sk_buff *skb2;
160 int ret;
161 int count = 1;
162
163 pr_debug("netem_enqueue skb=%p\n", skb);
164
165
166 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
167 ++count;
168
169
170 if (q->loss && q->loss >= get_crandom(&q->loss_cor))
171 --count;
172
173 if (count == 0) {
174 sch->qstats.drops++;
175 kfree_skb(skb);
176 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
177 }
178
179 skb_orphan(skb);
180
181
182
183
184
185
186 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
187 struct Qdisc *rootq = qdisc_root(sch);
188 u32 dupsave = q->duplicate;
189 q->duplicate = 0;
190
191 qdisc_enqueue_root(skb2, rootq);
192 q->duplicate = dupsave;
193 }
194
195
196
197
198
199
200
201 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
202 if (!(skb = skb_unshare(skb, GFP_ATOMIC))
203 || (skb->ip_summed == CHECKSUM_PARTIAL
204 && skb_checksum_help(skb))) {
205 sch->qstats.drops++;
206 return NET_XMIT_DROP;
207 }
208
209 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
210 }
211
212 cb = netem_skb_cb(skb);
213 if (q->gap == 0
214 || q->counter < q->gap
215 || q->reorder < get_crandom(&q->reorder_cor)) {
216 psched_time_t now;
217 psched_tdiff_t delay;
218
219 delay = tabledist(q->latency, q->jitter,
220 &q->delay_cor, q->delay_dist);
221
222 now = psched_get_time();
223 cb->time_to_send = now + delay;
224 ++q->counter;
225 ret = qdisc_enqueue(skb, q->qdisc);
226 } else {
227
228
229
230
231 cb->time_to_send = psched_get_time();
232 q->counter = 0;
233
234 __skb_queue_head(&q->qdisc->q, skb);
235 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
236 q->qdisc->qstats.requeues++;
237 ret = NET_XMIT_SUCCESS;
238 }
239
240 if (likely(ret == NET_XMIT_SUCCESS)) {
241 sch->q.qlen++;
242 sch->bstats.bytes += qdisc_pkt_len(skb);
243 sch->bstats.packets++;
244 } else if (net_xmit_drop_count(ret)) {
245 sch->qstats.drops++;
246 }
247
248 pr_debug("netem: enqueue ret %d\n", ret);
249 return ret;
250}
251
252static unsigned int netem_drop(struct Qdisc* sch)
253{
254 struct netem_sched_data *q = qdisc_priv(sch);
255 unsigned int len = 0;
256
257 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
258 sch->q.qlen--;
259 sch->qstats.drops++;
260 }
261 return len;
262}
263
264static struct sk_buff *netem_dequeue(struct Qdisc *sch)
265{
266 struct netem_sched_data *q = qdisc_priv(sch);
267 struct sk_buff *skb;
268
269 if (sch->flags & TCQ_F_THROTTLED)
270 return NULL;
271
272 skb = q->qdisc->ops->peek(q->qdisc);
273 if (skb) {
274 const struct netem_skb_cb *cb = netem_skb_cb(skb);
275 psched_time_t now = psched_get_time();
276
277
278 if (cb->time_to_send <= now) {
279 skb = qdisc_dequeue_peeked(q->qdisc);
280 if (unlikely(!skb))
281 return NULL;
282
283#ifdef CONFIG_NET_CLS_ACT
284
285
286
287
288 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
289 skb->tstamp.tv64 = 0;
290#endif
291 pr_debug("netem_dequeue: return skb=%p\n", skb);
292 sch->q.qlen--;
293 return skb;
294 }
295
296 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
297 }
298
299 return NULL;
300}
301
302static void netem_reset(struct Qdisc *sch)
303{
304 struct netem_sched_data *q = qdisc_priv(sch);
305
306 qdisc_reset(q->qdisc);
307 sch->q.qlen = 0;
308 qdisc_watchdog_cancel(&q->watchdog);
309}
310
311
312
313
314
315static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
316{
317 struct netem_sched_data *q = qdisc_priv(sch);
318 unsigned long n = nla_len(attr)/sizeof(__s16);
319 const __s16 *data = nla_data(attr);
320 spinlock_t *root_lock;
321 struct disttable *d;
322 int i;
323
324 if (n > 65536)
325 return -EINVAL;
326
327 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
328 if (!d)
329 return -ENOMEM;
330
331 d->size = n;
332 for (i = 0; i < n; i++)
333 d->table[i] = data[i];
334
335 root_lock = qdisc_root_sleeping_lock(sch);
336
337 spin_lock_bh(root_lock);
338 kfree(q->delay_dist);
339 q->delay_dist = d;
340 spin_unlock_bh(root_lock);
341 return 0;
342}
343
344static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
345{
346 struct netem_sched_data *q = qdisc_priv(sch);
347 const struct tc_netem_corr *c = nla_data(attr);
348
349 init_crandom(&q->delay_cor, c->delay_corr);
350 init_crandom(&q->loss_cor, c->loss_corr);
351 init_crandom(&q->dup_cor, c->dup_corr);
352}
353
354static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
355{
356 struct netem_sched_data *q = qdisc_priv(sch);
357 const struct tc_netem_reorder *r = nla_data(attr);
358
359 q->reorder = r->probability;
360 init_crandom(&q->reorder_cor, r->correlation);
361}
362
363static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
364{
365 struct netem_sched_data *q = qdisc_priv(sch);
366 const struct tc_netem_corrupt *r = nla_data(attr);
367
368 q->corrupt = r->probability;
369 init_crandom(&q->corrupt_cor, r->correlation);
370}
371
372static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
373 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
374 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
375 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
376};
377
378static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
379 const struct nla_policy *policy, int len)
380{
381 int nested_len = nla_len(nla) - NLA_ALIGN(len);
382
383 if (nested_len < 0)
384 return -EINVAL;
385 if (nested_len >= nla_attr_size(0))
386 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
387 nested_len, policy);
388 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
389 return 0;
390}
391
392
393static int netem_change(struct Qdisc *sch, struct nlattr *opt)
394{
395 struct netem_sched_data *q = qdisc_priv(sch);
396 struct nlattr *tb[TCA_NETEM_MAX + 1];
397 struct tc_netem_qopt *qopt;
398 int ret;
399
400 if (opt == NULL)
401 return -EINVAL;
402
403 qopt = nla_data(opt);
404 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
405 if (ret < 0)
406 return ret;
407
408 ret = fifo_set_limit(q->qdisc, qopt->limit);
409 if (ret) {
410 pr_debug("netem: can't set fifo limit\n");
411 return ret;
412 }
413
414 q->latency = qopt->latency;
415 q->jitter = qopt->jitter;
416 q->limit = qopt->limit;
417 q->gap = qopt->gap;
418 q->counter = 0;
419 q->loss = qopt->loss;
420 q->duplicate = qopt->duplicate;
421
422
423
424
425 if (q->gap)
426 q->reorder = ~0;
427
428 if (tb[TCA_NETEM_CORR])
429 get_correlation(sch, tb[TCA_NETEM_CORR]);
430
431 if (tb[TCA_NETEM_DELAY_DIST]) {
432 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
433 if (ret)
434 return ret;
435 }
436
437 if (tb[TCA_NETEM_REORDER])
438 get_reorder(sch, tb[TCA_NETEM_REORDER]);
439
440 if (tb[TCA_NETEM_CORRUPT])
441 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
442
443 return 0;
444}
445
446
447
448
449
450struct fifo_sched_data {
451 u32 limit;
452 psched_time_t oldest;
453};
454
455static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
456{
457 struct fifo_sched_data *q = qdisc_priv(sch);
458 struct sk_buff_head *list = &sch->q;
459 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
460 struct sk_buff *skb;
461
462 if (likely(skb_queue_len(list) < q->limit)) {
463
464 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
465 q->oldest = tnext;
466 return qdisc_enqueue_tail(nskb, sch);
467 }
468
469 skb_queue_reverse_walk(list, skb) {
470 const struct netem_skb_cb *cb = netem_skb_cb(skb);
471
472 if (tnext >= cb->time_to_send)
473 break;
474 }
475
476 __skb_queue_after(list, skb, nskb);
477
478 sch->qstats.backlog += qdisc_pkt_len(nskb);
479 sch->bstats.bytes += qdisc_pkt_len(nskb);
480 sch->bstats.packets++;
481
482 return NET_XMIT_SUCCESS;
483 }
484
485 return qdisc_reshape_fail(nskb, sch);
486}
487
488static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
489{
490 struct fifo_sched_data *q = qdisc_priv(sch);
491
492 if (opt) {
493 struct tc_fifo_qopt *ctl = nla_data(opt);
494 if (nla_len(opt) < sizeof(*ctl))
495 return -EINVAL;
496
497 q->limit = ctl->limit;
498 } else
499 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
500
501 q->oldest = PSCHED_PASTPERFECT;
502 return 0;
503}
504
505static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
506{
507 struct fifo_sched_data *q = qdisc_priv(sch);
508 struct tc_fifo_qopt opt = { .limit = q->limit };
509
510 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
511 return skb->len;
512
513nla_put_failure:
514 return -1;
515}
516
517static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
518 .id = "tfifo",
519 .priv_size = sizeof(struct fifo_sched_data),
520 .enqueue = tfifo_enqueue,
521 .dequeue = qdisc_dequeue_head,
522 .peek = qdisc_peek_head,
523 .drop = qdisc_queue_drop,
524 .init = tfifo_init,
525 .reset = qdisc_reset_queue,
526 .change = tfifo_init,
527 .dump = tfifo_dump,
528};
529
530static int netem_init(struct Qdisc *sch, struct nlattr *opt)
531{
532 struct netem_sched_data *q = qdisc_priv(sch);
533 int ret;
534
535 if (!opt)
536 return -EINVAL;
537
538 qdisc_watchdog_init(&q->watchdog, sch);
539
540 q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
541 &tfifo_qdisc_ops,
542 TC_H_MAKE(sch->handle, 1));
543 if (!q->qdisc) {
544 pr_debug("netem: qdisc create failed\n");
545 return -ENOMEM;
546 }
547
548 ret = netem_change(sch, opt);
549 if (ret) {
550 pr_debug("netem: change failed\n");
551 qdisc_destroy(q->qdisc);
552 }
553 return ret;
554}
555
556static void netem_destroy(struct Qdisc *sch)
557{
558 struct netem_sched_data *q = qdisc_priv(sch);
559
560 qdisc_watchdog_cancel(&q->watchdog);
561 qdisc_destroy(q->qdisc);
562 kfree(q->delay_dist);
563}
564
565static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
566{
567 const struct netem_sched_data *q = qdisc_priv(sch);
568 unsigned char *b = skb_tail_pointer(skb);
569 struct nlattr *nla = (struct nlattr *) b;
570 struct tc_netem_qopt qopt;
571 struct tc_netem_corr cor;
572 struct tc_netem_reorder reorder;
573 struct tc_netem_corrupt corrupt;
574
575 qopt.latency = q->latency;
576 qopt.jitter = q->jitter;
577 qopt.limit = q->limit;
578 qopt.loss = q->loss;
579 qopt.gap = q->gap;
580 qopt.duplicate = q->duplicate;
581 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
582
583 cor.delay_corr = q->delay_cor.rho;
584 cor.loss_corr = q->loss_cor.rho;
585 cor.dup_corr = q->dup_cor.rho;
586 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
587
588 reorder.probability = q->reorder;
589 reorder.correlation = q->reorder_cor.rho;
590 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
591
592 corrupt.probability = q->corrupt;
593 corrupt.correlation = q->corrupt_cor.rho;
594 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
595
596 nla->nla_len = skb_tail_pointer(skb) - b;
597
598 return skb->len;
599
600nla_put_failure:
601 nlmsg_trim(skb, b);
602 return -1;
603}
604
605static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
606 .id = "netem",
607 .priv_size = sizeof(struct netem_sched_data),
608 .enqueue = netem_enqueue,
609 .dequeue = netem_dequeue,
610 .peek = qdisc_peek_dequeued,
611 .drop = netem_drop,
612 .init = netem_init,
613 .reset = netem_reset,
614 .destroy = netem_destroy,
615 .change = netem_change,
616 .dump = netem_dump,
617 .owner = THIS_MODULE,
618};
619
620
621static int __init netem_module_init(void)
622{
623 pr_info("netem: version " VERSION "\n");
624 return register_qdisc(&netem_qdisc_ops);
625}
626static void __exit netem_module_exit(void)
627{
628 unregister_qdisc(&netem_qdisc_ops);
629}
630module_init(netem_module_init)
631module_exit(netem_module_exit)
632MODULE_LICENSE("GPL");
633