1
2
3
4
5#include <rte_lcore.h>
6#include <rte_cycles.h>
7#include <rte_cpuflags.h>
8#include <rte_malloc.h>
9#include <rte_ethdev.h>
10#include <rte_power_intrinsics.h>
11
12#include "rte_power_pmd_mgmt.h"
13
14#define EMPTYPOLL_MAX 512
15
16
17static struct pmd_conf_data {
18
19 struct rte_cpu_intrinsics intrinsics_support;
20
21 uint64_t tsc_per_us;
22
23 uint64_t pause_per_us;
24} global_data;
25
26
27
28
29enum pmd_mgmt_state {
30
31 PMD_MGMT_DISABLED = 0,
32
33 PMD_MGMT_ENABLED
34};
35
36union queue {
37 uint32_t val;
38 struct {
39 uint16_t portid;
40 uint16_t qid;
41 };
42};
43
44struct queue_list_entry {
45 TAILQ_ENTRY(queue_list_entry) next;
46 union queue queue;
47 uint64_t n_empty_polls;
48 uint64_t n_sleeps;
49 const struct rte_eth_rxtx_callback *cb;
50};
51
52struct pmd_core_cfg {
53 TAILQ_HEAD(queue_list_head, queue_list_entry) head;
54
55 size_t n_queues;
56
57 volatile enum pmd_mgmt_state pwr_mgmt_state;
58
59 enum rte_power_pmd_mgmt_type cb_mode;
60
61 uint64_t n_queues_ready_to_sleep;
62
63 uint64_t sleep_target;
64
65} __rte_cache_aligned;
66static struct pmd_core_cfg lcore_cfgs[RTE_MAX_LCORE];
67
68static inline bool
69queue_equal(const union queue *l, const union queue *r)
70{
71 return l->val == r->val;
72}
73
74static inline void
75queue_copy(union queue *dst, const union queue *src)
76{
77 dst->val = src->val;
78}
79
80static struct queue_list_entry *
81queue_list_find(const struct pmd_core_cfg *cfg, const union queue *q)
82{
83 struct queue_list_entry *cur;
84
85 TAILQ_FOREACH(cur, &cfg->head, next) {
86 if (queue_equal(&cur->queue, q))
87 return cur;
88 }
89 return NULL;
90}
91
92static int
93queue_list_add(struct pmd_core_cfg *cfg, const union queue *q)
94{
95 struct queue_list_entry *qle;
96
97
98 if (queue_list_find(cfg, q) != NULL)
99 return -EEXIST;
100
101 qle = malloc(sizeof(*qle));
102 if (qle == NULL)
103 return -ENOMEM;
104 memset(qle, 0, sizeof(*qle));
105
106 queue_copy(&qle->queue, q);
107 TAILQ_INSERT_TAIL(&cfg->head, qle, next);
108 cfg->n_queues++;
109
110 return 0;
111}
112
113static struct queue_list_entry *
114queue_list_take(struct pmd_core_cfg *cfg, const union queue *q)
115{
116 struct queue_list_entry *found;
117
118 found = queue_list_find(cfg, q);
119 if (found == NULL)
120 return NULL;
121
122 TAILQ_REMOVE(&cfg->head, found, next);
123 cfg->n_queues--;
124
125
126 return found;
127}
128
129static inline int
130get_monitor_addresses(struct pmd_core_cfg *cfg,
131 struct rte_power_monitor_cond *pmc, size_t len)
132{
133 const struct queue_list_entry *qle;
134 size_t i = 0;
135 int ret;
136
137 TAILQ_FOREACH(qle, &cfg->head, next) {
138 const union queue *q = &qle->queue;
139 struct rte_power_monitor_cond *cur;
140
141
142 if (i >= len) {
143 RTE_LOG(ERR, POWER, "Too many queues being monitored\n");
144 return -1;
145 }
146
147 cur = &pmc[i++];
148 ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
149 if (ret < 0)
150 return ret;
151 }
152 return 0;
153}
154
155static void
156calc_tsc(void)
157{
158 const uint64_t hz = rte_get_timer_hz();
159 const uint64_t tsc_per_us = hz / US_PER_S;
160
161 global_data.tsc_per_us = tsc_per_us;
162
163
164 if (!global_data.intrinsics_support.power_pause) {
165 const uint64_t start = rte_rdtsc_precise();
166 const uint32_t n_pauses = 10000;
167 double us, us_per_pause;
168 uint64_t end;
169 unsigned int i;
170
171
172 for (i = 0; i < n_pauses; i++)
173 rte_pause();
174
175 end = rte_rdtsc_precise();
176 us = (end - start) / (double)tsc_per_us;
177 us_per_pause = us / n_pauses;
178
179 global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause);
180 }
181}
182
183static inline void
184queue_reset(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
185{
186 const bool is_ready_to_sleep = qcfg->n_sleeps == cfg->sleep_target;
187
188
189 qcfg->n_empty_polls = 0;
190
191 qcfg->n_sleeps = 0;
192
193 if (is_ready_to_sleep)
194 cfg->n_queues_ready_to_sleep--;
195
196
197
198
199
200}
201
202static inline bool
203queue_can_sleep(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
204{
205
206 qcfg->n_empty_polls++;
207
208
209 if (qcfg->n_empty_polls <= EMPTYPOLL_MAX)
210 return false;
211
212
213
214
215
216 if (qcfg->n_sleeps == cfg->sleep_target)
217 return true;
218
219
220 qcfg->n_sleeps = cfg->sleep_target;
221 cfg->n_queues_ready_to_sleep++;
222
223 return true;
224}
225
226static inline bool
227lcore_can_sleep(struct pmd_core_cfg *cfg)
228{
229
230 if (cfg->n_queues_ready_to_sleep != cfg->n_queues)
231 return false;
232
233
234 cfg->n_queues_ready_to_sleep = 0;
235 cfg->sleep_target++;
236
237
238
239
240
241 return true;
242}
243
244static uint16_t
245clb_multiwait(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
246 struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
247 uint16_t max_pkts __rte_unused, void *arg)
248{
249 const unsigned int lcore = rte_lcore_id();
250 struct queue_list_entry *queue_conf = arg;
251 struct pmd_core_cfg *lcore_conf;
252 const bool empty = nb_rx == 0;
253
254 lcore_conf = &lcore_cfgs[lcore];
255
256
257 if (likely(!empty))
258
259 queue_reset(lcore_conf, queue_conf);
260 else {
261 struct rte_power_monitor_cond pmc[lcore_conf->n_queues];
262 int ret;
263
264
265 if (!queue_can_sleep(lcore_conf, queue_conf))
266 return nb_rx;
267
268
269 if (!lcore_can_sleep(lcore_conf))
270 return nb_rx;
271
272
273 ret = get_monitor_addresses(lcore_conf, pmc,
274 lcore_conf->n_queues);
275 if (ret < 0)
276 return nb_rx;
277
278 rte_power_monitor_multi(pmc, lcore_conf->n_queues, UINT64_MAX);
279 }
280
281 return nb_rx;
282}
283
284static uint16_t
285clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
286 uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg)
287{
288 struct queue_list_entry *queue_conf = arg;
289
290
291 if (unlikely(nb_rx == 0)) {
292 queue_conf->n_empty_polls++;
293 if (unlikely(queue_conf->n_empty_polls > EMPTYPOLL_MAX)) {
294 struct rte_power_monitor_cond pmc;
295 int ret;
296
297
298 ret = rte_eth_get_monitor_addr(port_id, qidx,
299 &pmc);
300 if (ret == 0)
301 rte_power_monitor(&pmc, UINT64_MAX);
302 }
303 } else
304 queue_conf->n_empty_polls = 0;
305
306 return nb_rx;
307}
308
309static uint16_t
310clb_pause(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
311 struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
312 uint16_t max_pkts __rte_unused, void *arg)
313{
314 const unsigned int lcore = rte_lcore_id();
315 struct queue_list_entry *queue_conf = arg;
316 struct pmd_core_cfg *lcore_conf;
317 const bool empty = nb_rx == 0;
318
319 lcore_conf = &lcore_cfgs[lcore];
320
321 if (likely(!empty))
322
323 queue_reset(lcore_conf, queue_conf);
324 else {
325
326 if (!queue_can_sleep(lcore_conf, queue_conf))
327 return nb_rx;
328
329
330 if (!lcore_can_sleep(lcore_conf))
331 return nb_rx;
332
333
334 if (global_data.intrinsics_support.power_pause) {
335 const uint64_t cur = rte_rdtsc();
336 const uint64_t wait_tsc =
337 cur + global_data.tsc_per_us;
338 rte_power_pause(wait_tsc);
339 } else {
340 uint64_t i;
341 for (i = 0; i < global_data.pause_per_us; i++)
342 rte_pause();
343 }
344 }
345
346 return nb_rx;
347}
348
349static uint16_t
350clb_scale_freq(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
351 struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
352 uint16_t max_pkts __rte_unused, void *arg)
353{
354 const unsigned int lcore = rte_lcore_id();
355 const bool empty = nb_rx == 0;
356 struct pmd_core_cfg *lcore_conf = &lcore_cfgs[lcore];
357 struct queue_list_entry *queue_conf = arg;
358
359 if (likely(!empty)) {
360
361 queue_reset(lcore_conf, queue_conf);
362
363
364 rte_power_freq_max(rte_lcore_id());
365 } else {
366
367 if (!queue_can_sleep(lcore_conf, queue_conf))
368 return nb_rx;
369
370
371 if (!lcore_can_sleep(lcore_conf))
372 return nb_rx;
373
374 rte_power_freq_min(rte_lcore_id());
375 }
376
377 return nb_rx;
378}
379
380static int
381queue_stopped(const uint16_t port_id, const uint16_t queue_id)
382{
383 struct rte_eth_rxq_info qinfo;
384
385 int ret = rte_eth_rx_queue_info_get(port_id, queue_id, &qinfo);
386 if (ret < 0) {
387 if (ret == -ENOTSUP)
388 return 1;
389 else
390 return -1;
391 }
392
393 return qinfo.queue_state == RTE_ETH_QUEUE_STATE_STOPPED;
394}
395
396static int
397cfg_queues_stopped(struct pmd_core_cfg *queue_cfg)
398{
399 const struct queue_list_entry *entry;
400
401 TAILQ_FOREACH(entry, &queue_cfg->head, next) {
402 const union queue *q = &entry->queue;
403 int ret = queue_stopped(q->portid, q->qid);
404 if (ret != 1)
405 return ret;
406 }
407 return 1;
408}
409
410static int
411check_scale(unsigned int lcore)
412{
413 enum power_management_env env;
414
415
416 if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
417 !rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) {
418 RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are supported\n");
419 return -ENOTSUP;
420 }
421
422 if (rte_power_init(lcore))
423 return -EINVAL;
424
425
426 env = rte_power_get_env();
427 if (env != PM_ENV_ACPI_CPUFREQ && env != PM_ENV_PSTATE_CPUFREQ) {
428 RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes were initialized\n");
429 return -ENOTSUP;
430 }
431
432
433 return 0;
434}
435
436static int
437check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
438{
439 struct rte_power_monitor_cond dummy;
440 bool multimonitor_supported;
441
442
443 if (!global_data.intrinsics_support.power_monitor) {
444 RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n");
445 return -ENOTSUP;
446 }
447
448 multimonitor_supported =
449 global_data.intrinsics_support.power_monitor_multi;
450
451
452 if (cfg->n_queues > 0 && !multimonitor_supported) {
453 RTE_LOG(DEBUG, POWER, "Monitoring multiple queues is not supported\n");
454 return -ENOTSUP;
455 }
456
457
458 if (rte_eth_get_monitor_addr(qdata->portid, qdata->qid,
459 &dummy) == -ENOTSUP) {
460 RTE_LOG(DEBUG, POWER, "The device does not support rte_eth_get_monitor_addr\n");
461 return -ENOTSUP;
462 }
463
464
465 return 0;
466}
467
468static inline rte_rx_callback_fn
469get_monitor_callback(void)
470{
471 return global_data.intrinsics_support.power_monitor_multi ?
472 clb_multiwait : clb_umwait;
473}
474
475int
476rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
477 uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
478{
479 const union queue qdata = {.portid = port_id, .qid = queue_id};
480 struct pmd_core_cfg *lcore_cfg;
481 struct queue_list_entry *queue_cfg;
482 struct rte_eth_dev_info info;
483 rte_rx_callback_fn clb;
484 int ret;
485
486 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
487
488 if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) {
489 ret = -EINVAL;
490 goto end;
491 }
492
493 if (rte_eth_dev_info_get(port_id, &info) < 0) {
494 ret = -EINVAL;
495 goto end;
496 }
497
498
499 if (queue_id >= info.nb_rx_queues) {
500 ret = -EINVAL;
501 goto end;
502 }
503
504
505 ret = queue_stopped(port_id, queue_id);
506 if (ret != 1) {
507
508 ret = ret < 0 ? -EINVAL : -EBUSY;
509 goto end;
510 }
511
512 lcore_cfg = &lcore_cfgs[lcore_id];
513
514
515 ret = cfg_queues_stopped(lcore_cfg);
516 if (ret != 1) {
517
518 ret = ret < 0 ? -EINVAL : -EBUSY;
519 goto end;
520 }
521
522
523 if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
524 lcore_cfg->cb_mode != mode) {
525 ret = -EINVAL;
526 goto end;
527 }
528
529
530 rte_cpu_get_intrinsics_support(&global_data.intrinsics_support);
531
532 switch (mode) {
533 case RTE_POWER_MGMT_TYPE_MONITOR:
534
535 ret = check_monitor(lcore_cfg, &qdata);
536 if (ret < 0)
537 goto end;
538
539 clb = get_monitor_callback();
540 break;
541 case RTE_POWER_MGMT_TYPE_SCALE:
542 clb = clb_scale_freq;
543
544
545 if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED)
546 break;
547
548 ret = check_scale(lcore_id);
549 if (ret < 0)
550 goto end;
551 break;
552 case RTE_POWER_MGMT_TYPE_PAUSE:
553
554 if (global_data.tsc_per_us == 0)
555 calc_tsc();
556
557 clb = clb_pause;
558 break;
559 default:
560 RTE_LOG(DEBUG, POWER, "Invalid power management type\n");
561 ret = -EINVAL;
562 goto end;
563 }
564
565 ret = queue_list_add(lcore_cfg, &qdata);
566 if (ret < 0) {
567 RTE_LOG(DEBUG, POWER, "Failed to add queue to list: %s\n",
568 strerror(-ret));
569 goto end;
570 }
571
572 queue_cfg = TAILQ_LAST(&lcore_cfg->head, queue_list_head);
573
574
575 if (lcore_cfg->n_queues == 1 && lcore_cfg->sleep_target == 0)
576 lcore_cfg->sleep_target = 1;
577
578
579 if (lcore_cfg->n_queues == 1) {
580 lcore_cfg->cb_mode = mode;
581 lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
582 }
583 queue_cfg->cb = rte_eth_add_rx_callback(port_id, queue_id,
584 clb, queue_cfg);
585
586 ret = 0;
587end:
588 return ret;
589}
590
591int
592rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
593 uint16_t port_id, uint16_t queue_id)
594{
595 const union queue qdata = {.portid = port_id, .qid = queue_id};
596 struct pmd_core_cfg *lcore_cfg;
597 struct queue_list_entry *queue_cfg;
598 int ret;
599
600 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
601
602 if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT)
603 return -EINVAL;
604
605
606 ret = queue_stopped(port_id, queue_id);
607 if (ret != 1) {
608
609 return ret < 0 ? -EINVAL : -EBUSY;
610 }
611
612
613 lcore_cfg = &lcore_cfgs[lcore_id];
614
615
616 ret = cfg_queues_stopped(lcore_cfg);
617 if (ret != 1) {
618
619 return ret < 0 ? -EINVAL : -EBUSY;
620 }
621
622 if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED)
623 return -EINVAL;
624
625
626
627
628
629
630
631 queue_cfg = queue_list_take(lcore_cfg, &qdata);
632 if (queue_cfg == NULL)
633 return -ENOENT;
634
635
636 if (lcore_cfg->n_queues == 0)
637 lcore_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
638
639 switch (lcore_cfg->cb_mode) {
640 case RTE_POWER_MGMT_TYPE_MONITOR:
641 case RTE_POWER_MGMT_TYPE_PAUSE:
642 rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
643 break;
644 case RTE_POWER_MGMT_TYPE_SCALE:
645 rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
646
647 if (lcore_cfg->pwr_mgmt_state == PMD_MGMT_DISABLED) {
648 rte_power_freq_max(lcore_id);
649 rte_power_exit(lcore_id);
650 }
651 break;
652 }
653
654
655
656
657
658 rte_free((void *)queue_cfg->cb);
659 free(queue_cfg);
660
661 return 0;
662}
663
664RTE_INIT(rte_power_ethdev_pmgmt_init) {
665 size_t i;
666
667
668 for (i = 0; i < RTE_DIM(lcore_cfgs); i++) {
669 struct pmd_core_cfg *cfg = &lcore_cfgs[i];
670 TAILQ_INIT(&cfg->head);
671 }
672}
673