1
2
3
4
5
6
7#include <linux/trace_events.h>
8#include <linux/ring_buffer.h>
9#include <linux/trace_clock.h>
10#include <linux/sched/clock.h>
11#include <linux/trace_seq.h>
12#include <linux/spinlock.h>
13#include <linux/irq_work.h>
14#include <linux/security.h>
15#include <linux/uaccess.h>
16#include <linux/hardirq.h>
17#include <linux/kthread.h>
18#include <linux/module.h>
19#include <linux/percpu.h>
20#include <linux/mutex.h>
21#include <linux/delay.h>
22#include <linux/slab.h>
23#include <linux/init.h>
24#include <linux/hash.h>
25#include <linux/list.h>
26#include <linux/cpu.h>
27#include <linux/oom.h>
28
29#include <asm/local.h>
30
31static void update_pages_handler(struct work_struct *work);
32
33
34
35
36int ring_buffer_print_entry_header(struct trace_seq *s)
37{
38 trace_seq_puts(s, "# compressed entry header\n");
39 trace_seq_puts(s, "\ttype_len : 5 bits\n");
40 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
41 trace_seq_puts(s, "\tarray : 32 bits\n");
42 trace_seq_putc(s, '\n');
43 trace_seq_printf(s, "\tpadding : type == %d\n",
44 RINGBUF_TYPE_PADDING);
45 trace_seq_printf(s, "\ttime_extend : type == %d\n",
46 RINGBUF_TYPE_TIME_EXTEND);
47 trace_seq_printf(s, "\ttime_stamp : type == %d\n",
48 RINGBUF_TYPE_TIME_STAMP);
49 trace_seq_printf(s, "\tdata max type_len == %d\n",
50 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
51
52 return !trace_seq_has_overflowed(s);
53}
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124#define RB_BUFFER_OFF (1 << 20)
125
126#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
127
128#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
129#define RB_ALIGNMENT 4U
130#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
131#define RB_EVNT_MIN_SIZE 8U
132#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
133
134
135#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
136
137enum {
138 RB_LEN_TIME_EXTEND = 8,
139 RB_LEN_TIME_STAMP = 8,
140};
141
142#define skip_time_extend(event) \
143 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
144
145#define extended_time(event) \
146 (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
147
148static inline int rb_null_event(struct ring_buffer_event *event)
149{
150 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
151}
152
153static void rb_event_set_padding(struct ring_buffer_event *event)
154{
155
156 event->type_len = RINGBUF_TYPE_PADDING;
157 event->time_delta = 0;
158}
159
160static unsigned
161rb_event_data_length(struct ring_buffer_event *event)
162{
163 unsigned length;
164
165 if (event->type_len)
166 length = event->type_len * RB_ALIGNMENT;
167 else
168 length = event->array[0];
169 return length + RB_EVNT_HDR_SIZE;
170}
171
172
173
174
175
176
177static inline unsigned
178rb_event_length(struct ring_buffer_event *event)
179{
180 switch (event->type_len) {
181 case RINGBUF_TYPE_PADDING:
182 if (rb_null_event(event))
183
184 return -1;
185 return event->array[0] + RB_EVNT_HDR_SIZE;
186
187 case RINGBUF_TYPE_TIME_EXTEND:
188 return RB_LEN_TIME_EXTEND;
189
190 case RINGBUF_TYPE_TIME_STAMP:
191 return RB_LEN_TIME_STAMP;
192
193 case RINGBUF_TYPE_DATA:
194 return rb_event_data_length(event);
195 default:
196 WARN_ON_ONCE(1);
197 }
198
199 return 0;
200}
201
202
203
204
205
206static inline unsigned
207rb_event_ts_length(struct ring_buffer_event *event)
208{
209 unsigned len = 0;
210
211 if (extended_time(event)) {
212
213 len = RB_LEN_TIME_EXTEND;
214 event = skip_time_extend(event);
215 }
216 return len + rb_event_length(event);
217}
218
219
220
221
222
223
224
225
226
227
228
229unsigned ring_buffer_event_length(struct ring_buffer_event *event)
230{
231 unsigned length;
232
233 if (extended_time(event))
234 event = skip_time_extend(event);
235
236 length = rb_event_length(event);
237 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
238 return length;
239 length -= RB_EVNT_HDR_SIZE;
240 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
241 length -= sizeof(event->array[0]);
242 return length;
243}
244EXPORT_SYMBOL_GPL(ring_buffer_event_length);
245
246
247static __always_inline void *
248rb_event_data(struct ring_buffer_event *event)
249{
250 if (extended_time(event))
251 event = skip_time_extend(event);
252 WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
253
254 if (event->type_len)
255 return (void *)&event->array[0];
256
257 return (void *)&event->array[1];
258}
259
260
261
262
263
264void *ring_buffer_event_data(struct ring_buffer_event *event)
265{
266 return rb_event_data(event);
267}
268EXPORT_SYMBOL_GPL(ring_buffer_event_data);
269
270#define for_each_buffer_cpu(buffer, cpu) \
271 for_each_cpu(cpu, buffer->cpumask)
272
273#define TS_SHIFT 27
274#define TS_MASK ((1ULL << TS_SHIFT) - 1)
275#define TS_DELTA_TEST (~TS_MASK)
276
277
278
279
280
281
282
283
284
285
286
287u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
288{
289 u64 ts;
290
291 ts = event->array[0];
292 ts <<= TS_SHIFT;
293 ts += event->time_delta;
294
295 return ts;
296}
297
298
299#define RB_MISSED_EVENTS (1 << 31)
300
301#define RB_MISSED_STORED (1 << 30)
302
303struct buffer_data_page {
304 u64 time_stamp;
305 local_t commit;
306 unsigned char data[] RB_ALIGN_DATA;
307};
308
309
310
311
312
313
314
315
316
317struct buffer_page {
318 struct list_head list;
319 local_t write;
320 unsigned read;
321 local_t entries;
322 unsigned long real_end;
323 struct buffer_data_page *page;
324};
325
326
327
328
329
330
331
332
333
334
335
336
337
338#define RB_WRITE_MASK 0xfffff
339#define RB_WRITE_INTCNT (1 << 20)
340
341static void rb_init_page(struct buffer_data_page *bpage)
342{
343 local_set(&bpage->commit, 0);
344}
345
346
347
348
349
350static void free_buffer_page(struct buffer_page *bpage)
351{
352 free_page((unsigned long)bpage->page);
353 kfree(bpage);
354}
355
356
357
358
359static inline int test_time_stamp(u64 delta)
360{
361 if (delta & TS_DELTA_TEST)
362 return 1;
363 return 0;
364}
365
366#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
367
368
369#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
370
371int ring_buffer_print_page_header(struct trace_seq *s)
372{
373 struct buffer_data_page field;
374
375 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
376 "offset:0;\tsize:%u;\tsigned:%u;\n",
377 (unsigned int)sizeof(field.time_stamp),
378 (unsigned int)is_signed_type(u64));
379
380 trace_seq_printf(s, "\tfield: local_t commit;\t"
381 "offset:%u;\tsize:%u;\tsigned:%u;\n",
382 (unsigned int)offsetof(typeof(field), commit),
383 (unsigned int)sizeof(field.commit),
384 (unsigned int)is_signed_type(long));
385
386 trace_seq_printf(s, "\tfield: int overwrite;\t"
387 "offset:%u;\tsize:%u;\tsigned:%u;\n",
388 (unsigned int)offsetof(typeof(field), commit),
389 1,
390 (unsigned int)is_signed_type(long));
391
392 trace_seq_printf(s, "\tfield: char data;\t"
393 "offset:%u;\tsize:%u;\tsigned:%u;\n",
394 (unsigned int)offsetof(typeof(field), data),
395 (unsigned int)BUF_PAGE_SIZE,
396 (unsigned int)is_signed_type(char));
397
398 return !trace_seq_has_overflowed(s);
399}
400
401struct rb_irq_work {
402 struct irq_work work;
403 wait_queue_head_t waiters;
404 wait_queue_head_t full_waiters;
405 bool waiters_pending;
406 bool full_waiters_pending;
407 bool wakeup_full;
408};
409
410
411
412
413struct rb_event_info {
414 u64 ts;
415 u64 delta;
416 unsigned long length;
417 struct buffer_page *tail_page;
418 int add_timestamp;
419};
420
421
422
423
424
425
426
427
428
429
430enum {
431 RB_CTX_NMI,
432 RB_CTX_IRQ,
433 RB_CTX_SOFTIRQ,
434 RB_CTX_NORMAL,
435 RB_CTX_MAX
436};
437
438
439
440
441struct ring_buffer_per_cpu {
442 int cpu;
443 atomic_t record_disabled;
444 atomic_t resize_disabled;
445 struct trace_buffer *buffer;
446 raw_spinlock_t reader_lock;
447 arch_spinlock_t lock;
448 struct lock_class_key lock_key;
449 struct buffer_data_page *free_page;
450 unsigned long nr_pages;
451 unsigned int current_context;
452 struct list_head *pages;
453 struct buffer_page *head_page;
454 struct buffer_page *tail_page;
455 struct buffer_page *commit_page;
456 struct buffer_page *reader_page;
457 unsigned long lost_events;
458 unsigned long last_overrun;
459 unsigned long nest;
460 local_t entries_bytes;
461 local_t entries;
462 local_t overrun;
463 local_t commit_overrun;
464 local_t dropped_events;
465 local_t committing;
466 local_t commits;
467 local_t pages_touched;
468 local_t pages_read;
469 long last_pages_touch;
470 size_t shortest_full;
471 unsigned long read;
472 unsigned long read_bytes;
473 u64 write_stamp;
474 u64 read_stamp;
475
476 long nr_pages_to_update;
477 struct list_head new_pages;
478 struct work_struct update_pages_work;
479 struct completion update_done;
480
481 struct rb_irq_work irq_work;
482};
483
484struct trace_buffer {
485 unsigned flags;
486 int cpus;
487 atomic_t record_disabled;
488 cpumask_var_t cpumask;
489
490 struct lock_class_key *reader_lock_key;
491
492 struct mutex mutex;
493
494 struct ring_buffer_per_cpu **buffers;
495
496 struct hlist_node node;
497 u64 (*clock)(void);
498
499 struct rb_irq_work irq_work;
500 bool time_stamp_abs;
501};
502
503struct ring_buffer_iter {
504 struct ring_buffer_per_cpu *cpu_buffer;
505 unsigned long head;
506 unsigned long next_event;
507 struct buffer_page *head_page;
508 struct buffer_page *cache_reader_page;
509 unsigned long cache_read;
510 u64 read_stamp;
511 u64 page_stamp;
512 struct ring_buffer_event *event;
513 int missed_events;
514};
515
516
517
518
519
520
521
522
523size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
524{
525 return buffer->buffers[cpu]->nr_pages;
526}
527
528
529
530
531
532
533
534
535size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
536{
537 size_t read;
538 size_t cnt;
539
540 read = local_read(&buffer->buffers[cpu]->pages_read);
541 cnt = local_read(&buffer->buffers[cpu]->pages_touched);
542
543 if (cnt < read) {
544 WARN_ON_ONCE(read > cnt + 1);
545 return 0;
546 }
547
548 return cnt - read;
549}
550
551
552
553
554
555
556
557static void rb_wake_up_waiters(struct irq_work *work)
558{
559 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
560
561 wake_up_all(&rbwork->waiters);
562 if (rbwork->wakeup_full) {
563 rbwork->wakeup_full = false;
564 wake_up_all(&rbwork->full_waiters);
565 }
566}
567
568
569
570
571
572
573
574
575
576
577
578int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
579{
580 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
581 DEFINE_WAIT(wait);
582 struct rb_irq_work *work;
583 int ret = 0;
584
585
586
587
588
589
590 if (cpu == RING_BUFFER_ALL_CPUS) {
591 work = &buffer->irq_work;
592
593 full = 0;
594 } else {
595 if (!cpumask_test_cpu(cpu, buffer->cpumask))
596 return -ENODEV;
597 cpu_buffer = buffer->buffers[cpu];
598 work = &cpu_buffer->irq_work;
599 }
600
601
602 while (true) {
603 if (full)
604 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
605 else
606 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628 if (full)
629 work->full_waiters_pending = true;
630 else
631 work->waiters_pending = true;
632
633 if (signal_pending(current)) {
634 ret = -EINTR;
635 break;
636 }
637
638 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
639 break;
640
641 if (cpu != RING_BUFFER_ALL_CPUS &&
642 !ring_buffer_empty_cpu(buffer, cpu)) {
643 unsigned long flags;
644 bool pagebusy;
645 size_t nr_pages;
646 size_t dirty;
647
648 if (!full)
649 break;
650
651 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
652 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
653 nr_pages = cpu_buffer->nr_pages;
654 dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
655 if (!cpu_buffer->shortest_full ||
656 cpu_buffer->shortest_full < full)
657 cpu_buffer->shortest_full = full;
658 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
659 if (!pagebusy &&
660 (!nr_pages || (dirty * 100) > full * nr_pages))
661 break;
662 }
663
664 schedule();
665 }
666
667 if (full)
668 finish_wait(&work->full_waiters, &wait);
669 else
670 finish_wait(&work->waiters, &wait);
671
672 return ret;
673}
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
690 struct file *filp, poll_table *poll_table)
691{
692 struct ring_buffer_per_cpu *cpu_buffer;
693 struct rb_irq_work *work;
694
695 if (cpu == RING_BUFFER_ALL_CPUS)
696 work = &buffer->irq_work;
697 else {
698 if (!cpumask_test_cpu(cpu, buffer->cpumask))
699 return -EINVAL;
700
701 cpu_buffer = buffer->buffers[cpu];
702 work = &cpu_buffer->irq_work;
703 }
704
705 poll_wait(filp, &work->waiters, poll_table);
706 work->waiters_pending = true;
707
708
709
710
711
712
713
714
715
716
717
718
719
720 smp_mb();
721
722 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
723 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
724 return EPOLLIN | EPOLLRDNORM;
725 return 0;
726}
727
728
729#define RB_WARN_ON(b, cond) \
730 ({ \
731 int _____ret = unlikely(cond); \
732 if (_____ret) { \
733 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
734 struct ring_buffer_per_cpu *__b = \
735 (void *)b; \
736 atomic_inc(&__b->buffer->record_disabled); \
737 } else \
738 atomic_inc(&b->record_disabled); \
739 WARN_ON(1); \
740 } \
741 _____ret; \
742 })
743
744
745#define DEBUG_SHIFT 0
746
747static inline u64 rb_time_stamp(struct trace_buffer *buffer)
748{
749
750 return buffer->clock() << DEBUG_SHIFT;
751}
752
753u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu)
754{
755 u64 time;
756
757 preempt_disable_notrace();
758 time = rb_time_stamp(buffer);
759 preempt_enable_notrace();
760
761 return time;
762}
763EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
764
765void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer,
766 int cpu, u64 *ts)
767{
768
769 *ts >>= DEBUG_SHIFT;
770}
771EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842#define RB_PAGE_NORMAL 0UL
843#define RB_PAGE_HEAD 1UL
844#define RB_PAGE_UPDATE 2UL
845
846
847#define RB_FLAG_MASK 3UL
848
849
850#define RB_PAGE_MOVED 4UL
851
852
853
854
855static struct list_head *rb_list_head(struct list_head *list)
856{
857 unsigned long val = (unsigned long)list;
858
859 return (struct list_head *)(val & ~RB_FLAG_MASK);
860}
861
862
863
864
865
866
867
868
869
870static inline int
871rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
872 struct buffer_page *page, struct list_head *list)
873{
874 unsigned long val;
875
876 val = (unsigned long)list->next;
877
878 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
879 return RB_PAGE_MOVED;
880
881 return val & RB_FLAG_MASK;
882}
883
884
885
886
887
888
889
890
891static bool rb_is_reader_page(struct buffer_page *page)
892{
893 struct list_head *list = page->list.prev;
894
895 return rb_list_head(list->next) != &page->list;
896}
897
898
899
900
901static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
902 struct list_head *list)
903{
904 unsigned long *ptr;
905
906 ptr = (unsigned long *)&list->next;
907 *ptr |= RB_PAGE_HEAD;
908 *ptr &= ~RB_PAGE_UPDATE;
909}
910
911
912
913
914static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
915{
916 struct buffer_page *head;
917
918 head = cpu_buffer->head_page;
919 if (!head)
920 return;
921
922
923
924
925 rb_set_list_to_head(cpu_buffer, head->list.prev);
926}
927
928static void rb_list_head_clear(struct list_head *list)
929{
930 unsigned long *ptr = (unsigned long *)&list->next;
931
932 *ptr &= ~RB_FLAG_MASK;
933}
934
935
936
937
938static void
939rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
940{
941 struct list_head *hd;
942
943
944 rb_list_head_clear(cpu_buffer->pages);
945
946 list_for_each(hd, cpu_buffer->pages)
947 rb_list_head_clear(hd);
948}
949
950static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
951 struct buffer_page *head,
952 struct buffer_page *prev,
953 int old_flag, int new_flag)
954{
955 struct list_head *list;
956 unsigned long val = (unsigned long)&head->list;
957 unsigned long ret;
958
959 list = &prev->list;
960
961 val &= ~RB_FLAG_MASK;
962
963 ret = cmpxchg((unsigned long *)&list->next,
964 val | old_flag, val | new_flag);
965
966
967 if ((ret & ~RB_FLAG_MASK) != val)
968 return RB_PAGE_MOVED;
969
970 return ret & RB_FLAG_MASK;
971}
972
973static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
974 struct buffer_page *head,
975 struct buffer_page *prev,
976 int old_flag)
977{
978 return rb_head_page_set(cpu_buffer, head, prev,
979 old_flag, RB_PAGE_UPDATE);
980}
981
982static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
983 struct buffer_page *head,
984 struct buffer_page *prev,
985 int old_flag)
986{
987 return rb_head_page_set(cpu_buffer, head, prev,
988 old_flag, RB_PAGE_HEAD);
989}
990
991static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
992 struct buffer_page *head,
993 struct buffer_page *prev,
994 int old_flag)
995{
996 return rb_head_page_set(cpu_buffer, head, prev,
997 old_flag, RB_PAGE_NORMAL);
998}
999
1000static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
1001 struct buffer_page **bpage)
1002{
1003 struct list_head *p = rb_list_head((*bpage)->list.next);
1004
1005 *bpage = list_entry(p, struct buffer_page, list);
1006}
1007
1008static struct buffer_page *
1009rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
1010{
1011 struct buffer_page *head;
1012 struct buffer_page *page;
1013 struct list_head *list;
1014 int i;
1015
1016 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
1017 return NULL;
1018
1019
1020 list = cpu_buffer->pages;
1021 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
1022 return NULL;
1023
1024 page = head = cpu_buffer->head_page;
1025
1026
1027
1028
1029
1030
1031 for (i = 0; i < 3; i++) {
1032 do {
1033 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
1034 cpu_buffer->head_page = page;
1035 return page;
1036 }
1037 rb_inc_page(cpu_buffer, &page);
1038 } while (page != head);
1039 }
1040
1041 RB_WARN_ON(cpu_buffer, 1);
1042
1043 return NULL;
1044}
1045
1046static int rb_head_page_replace(struct buffer_page *old,
1047 struct buffer_page *new)
1048{
1049 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
1050 unsigned long val;
1051 unsigned long ret;
1052
1053 val = *ptr & ~RB_FLAG_MASK;
1054 val |= RB_PAGE_HEAD;
1055
1056 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
1057
1058 return ret == val;
1059}
1060
1061
1062
1063
1064static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1065 struct buffer_page *tail_page,
1066 struct buffer_page *next_page)
1067{
1068 unsigned long old_entries;
1069 unsigned long old_write;
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1081 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1082
1083 local_inc(&cpu_buffer->pages_touched);
1084
1085
1086
1087
1088 barrier();
1089
1090
1091
1092
1093
1094
1095 if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
1096
1097 unsigned long val = old_write & ~RB_WRITE_MASK;
1098 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110 (void)local_cmpxchg(&next_page->write, old_write, val);
1111 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1112
1113
1114
1115
1116
1117
1118 local_set(&next_page->page->commit, 0);
1119
1120
1121 (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
1122 }
1123}
1124
1125static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1126 struct buffer_page *bpage)
1127{
1128 unsigned long val = (unsigned long)bpage;
1129
1130 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1131 return 1;
1132
1133 return 0;
1134}
1135
1136
1137
1138
1139static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1140 struct list_head *list)
1141{
1142 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1143 return 1;
1144 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1145 return 1;
1146 return 0;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1157{
1158 struct list_head *head = cpu_buffer->pages;
1159 struct buffer_page *bpage, *tmp;
1160
1161
1162 if (cpu_buffer->head_page)
1163 rb_set_head_page(cpu_buffer);
1164
1165 rb_head_page_deactivate(cpu_buffer);
1166
1167 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1168 return -1;
1169 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1170 return -1;
1171
1172 if (rb_check_list(cpu_buffer, head))
1173 return -1;
1174
1175 list_for_each_entry_safe(bpage, tmp, head, list) {
1176 if (RB_WARN_ON(cpu_buffer,
1177 bpage->list.next->prev != &bpage->list))
1178 return -1;
1179 if (RB_WARN_ON(cpu_buffer,
1180 bpage->list.prev->next != &bpage->list))
1181 return -1;
1182 if (rb_check_list(cpu_buffer, &bpage->list))
1183 return -1;
1184 }
1185
1186 rb_head_page_activate(cpu_buffer);
1187
1188 return 0;
1189}
1190
1191static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
1192{
1193 struct buffer_page *bpage, *tmp;
1194 bool user_thread = current->mm != NULL;
1195 gfp_t mflags;
1196 long i;
1197
1198
1199
1200
1201
1202
1203
1204
1205 i = si_mem_available();
1206 if (i < nr_pages)
1207 return -ENOMEM;
1208
1209
1210
1211
1212
1213
1214 mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225 if (user_thread)
1226 set_current_oom_origin();
1227 for (i = 0; i < nr_pages; i++) {
1228 struct page *page;
1229
1230 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1231 mflags, cpu_to_node(cpu));
1232 if (!bpage)
1233 goto free_pages;
1234
1235 list_add(&bpage->list, pages);
1236
1237 page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
1238 if (!page)
1239 goto free_pages;
1240 bpage->page = page_address(page);
1241 rb_init_page(bpage->page);
1242
1243 if (user_thread && fatal_signal_pending(current))
1244 goto free_pages;
1245 }
1246 if (user_thread)
1247 clear_current_oom_origin();
1248
1249 return 0;
1250
1251free_pages:
1252 list_for_each_entry_safe(bpage, tmp, pages, list) {
1253 list_del_init(&bpage->list);
1254 free_buffer_page(bpage);
1255 }
1256 if (user_thread)
1257 clear_current_oom_origin();
1258
1259 return -ENOMEM;
1260}
1261
1262static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1263 unsigned long nr_pages)
1264{
1265 LIST_HEAD(pages);
1266
1267 WARN_ON(!nr_pages);
1268
1269 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1270 return -ENOMEM;
1271
1272
1273
1274
1275
1276
1277 cpu_buffer->pages = pages.next;
1278 list_del(&pages);
1279
1280 cpu_buffer->nr_pages = nr_pages;
1281
1282 rb_check_pages(cpu_buffer);
1283
1284 return 0;
1285}
1286
1287static struct ring_buffer_per_cpu *
1288rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
1289{
1290 struct ring_buffer_per_cpu *cpu_buffer;
1291 struct buffer_page *bpage;
1292 struct page *page;
1293 int ret;
1294
1295 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1296 GFP_KERNEL, cpu_to_node(cpu));
1297 if (!cpu_buffer)
1298 return NULL;
1299
1300 cpu_buffer->cpu = cpu;
1301 cpu_buffer->buffer = buffer;
1302 raw_spin_lock_init(&cpu_buffer->reader_lock);
1303 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1304 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1305 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1306 init_completion(&cpu_buffer->update_done);
1307 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1308 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1309 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1310
1311 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1312 GFP_KERNEL, cpu_to_node(cpu));
1313 if (!bpage)
1314 goto fail_free_buffer;
1315
1316 rb_check_bpage(cpu_buffer, bpage);
1317
1318 cpu_buffer->reader_page = bpage;
1319 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1320 if (!page)
1321 goto fail_free_reader;
1322 bpage->page = page_address(page);
1323 rb_init_page(bpage->page);
1324
1325 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1326 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1327
1328 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1329 if (ret < 0)
1330 goto fail_free_reader;
1331
1332 cpu_buffer->head_page
1333 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1334 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1335
1336 rb_head_page_activate(cpu_buffer);
1337
1338 return cpu_buffer;
1339
1340 fail_free_reader:
1341 free_buffer_page(cpu_buffer->reader_page);
1342
1343 fail_free_buffer:
1344 kfree(cpu_buffer);
1345 return NULL;
1346}
1347
1348static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1349{
1350 struct list_head *head = cpu_buffer->pages;
1351 struct buffer_page *bpage, *tmp;
1352
1353 free_buffer_page(cpu_buffer->reader_page);
1354
1355 rb_head_page_deactivate(cpu_buffer);
1356
1357 if (head) {
1358 list_for_each_entry_safe(bpage, tmp, head, list) {
1359 list_del_init(&bpage->list);
1360 free_buffer_page(bpage);
1361 }
1362 bpage = list_entry(head, struct buffer_page, list);
1363 free_buffer_page(bpage);
1364 }
1365
1366 kfree(cpu_buffer);
1367}
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1381 struct lock_class_key *key)
1382{
1383 struct trace_buffer *buffer;
1384 long nr_pages;
1385 int bsize;
1386 int cpu;
1387 int ret;
1388
1389
1390 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1391 GFP_KERNEL);
1392 if (!buffer)
1393 return NULL;
1394
1395 if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1396 goto fail_free_buffer;
1397
1398 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1399 buffer->flags = flags;
1400 buffer->clock = trace_clock_local;
1401 buffer->reader_lock_key = key;
1402
1403 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1404 init_waitqueue_head(&buffer->irq_work.waiters);
1405
1406
1407 if (nr_pages < 2)
1408 nr_pages = 2;
1409
1410 buffer->cpus = nr_cpu_ids;
1411
1412 bsize = sizeof(void *) * nr_cpu_ids;
1413 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1414 GFP_KERNEL);
1415 if (!buffer->buffers)
1416 goto fail_free_cpumask;
1417
1418 cpu = raw_smp_processor_id();
1419 cpumask_set_cpu(cpu, buffer->cpumask);
1420 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1421 if (!buffer->buffers[cpu])
1422 goto fail_free_buffers;
1423
1424 ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1425 if (ret < 0)
1426 goto fail_free_buffers;
1427
1428 mutex_init(&buffer->mutex);
1429
1430 return buffer;
1431
1432 fail_free_buffers:
1433 for_each_buffer_cpu(buffer, cpu) {
1434 if (buffer->buffers[cpu])
1435 rb_free_cpu_buffer(buffer->buffers[cpu]);
1436 }
1437 kfree(buffer->buffers);
1438
1439 fail_free_cpumask:
1440 free_cpumask_var(buffer->cpumask);
1441
1442 fail_free_buffer:
1443 kfree(buffer);
1444 return NULL;
1445}
1446EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1447
1448
1449
1450
1451
1452void
1453ring_buffer_free(struct trace_buffer *buffer)
1454{
1455 int cpu;
1456
1457 cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1458
1459 for_each_buffer_cpu(buffer, cpu)
1460 rb_free_cpu_buffer(buffer->buffers[cpu]);
1461
1462 kfree(buffer->buffers);
1463 free_cpumask_var(buffer->cpumask);
1464
1465 kfree(buffer);
1466}
1467EXPORT_SYMBOL_GPL(ring_buffer_free);
1468
1469void ring_buffer_set_clock(struct trace_buffer *buffer,
1470 u64 (*clock)(void))
1471{
1472 buffer->clock = clock;
1473}
1474
1475void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs)
1476{
1477 buffer->time_stamp_abs = abs;
1478}
1479
1480bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer)
1481{
1482 return buffer->time_stamp_abs;
1483}
1484
1485static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1486
1487static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1488{
1489 return local_read(&bpage->entries) & RB_WRITE_MASK;
1490}
1491
1492static inline unsigned long rb_page_write(struct buffer_page *bpage)
1493{
1494 return local_read(&bpage->write) & RB_WRITE_MASK;
1495}
1496
1497static int
1498rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
1499{
1500 struct list_head *tail_page, *to_remove, *next_page;
1501 struct buffer_page *to_remove_page, *tmp_iter_page;
1502 struct buffer_page *last_page, *first_page;
1503 unsigned long nr_removed;
1504 unsigned long head_bit;
1505 int page_entries;
1506
1507 head_bit = 0;
1508
1509 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1510 atomic_inc(&cpu_buffer->record_disabled);
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520 tail_page = &cpu_buffer->tail_page->list;
1521
1522
1523
1524
1525
1526 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1527 tail_page = rb_list_head(tail_page->next);
1528 to_remove = tail_page;
1529
1530
1531 first_page = list_entry(rb_list_head(to_remove->next),
1532 struct buffer_page, list);
1533
1534 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1535 to_remove = rb_list_head(to_remove)->next;
1536 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1537 }
1538
1539 next_page = rb_list_head(to_remove)->next;
1540
1541
1542
1543
1544
1545
1546 tail_page->next = (struct list_head *)((unsigned long)next_page |
1547 head_bit);
1548 next_page = rb_list_head(next_page);
1549 next_page->prev = tail_page;
1550
1551
1552 cpu_buffer->pages = next_page;
1553
1554
1555 if (head_bit)
1556 cpu_buffer->head_page = list_entry(next_page,
1557 struct buffer_page, list);
1558
1559
1560
1561
1562
1563 cpu_buffer->read = 0;
1564
1565
1566 atomic_dec(&cpu_buffer->record_disabled);
1567 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1568
1569 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1570
1571
1572 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1573 list);
1574 tmp_iter_page = first_page;
1575
1576 do {
1577 cond_resched();
1578
1579 to_remove_page = tmp_iter_page;
1580 rb_inc_page(cpu_buffer, &tmp_iter_page);
1581
1582
1583 page_entries = rb_page_entries(to_remove_page);
1584 if (page_entries) {
1585
1586
1587
1588
1589
1590
1591 local_add(page_entries, &cpu_buffer->overrun);
1592 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1593 }
1594
1595
1596
1597
1598
1599 free_buffer_page(to_remove_page);
1600 nr_removed--;
1601
1602 } while (to_remove_page != last_page);
1603
1604 RB_WARN_ON(cpu_buffer, nr_removed);
1605
1606 return nr_removed == 0;
1607}
1608
1609static int
1610rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1611{
1612 struct list_head *pages = &cpu_buffer->new_pages;
1613 int retries, success;
1614
1615 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630 retries = 10;
1631 success = 0;
1632 while (retries--) {
1633 struct list_head *head_page, *prev_page, *r;
1634 struct list_head *last_page, *first_page;
1635 struct list_head *head_page_with_bit;
1636
1637 head_page = &rb_set_head_page(cpu_buffer)->list;
1638 if (!head_page)
1639 break;
1640 prev_page = head_page->prev;
1641
1642 first_page = pages->next;
1643 last_page = pages->prev;
1644
1645 head_page_with_bit = (struct list_head *)
1646 ((unsigned long)head_page | RB_PAGE_HEAD);
1647
1648 last_page->next = head_page_with_bit;
1649 first_page->prev = prev_page;
1650
1651 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1652
1653 if (r == head_page_with_bit) {
1654
1655
1656
1657
1658
1659 head_page->prev = last_page;
1660 success = 1;
1661 break;
1662 }
1663 }
1664
1665 if (success)
1666 INIT_LIST_HEAD(pages);
1667
1668
1669
1670
1671 RB_WARN_ON(cpu_buffer, !success);
1672 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1673
1674
1675 if (!success) {
1676 struct buffer_page *bpage, *tmp;
1677 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1678 list) {
1679 list_del_init(&bpage->list);
1680 free_buffer_page(bpage);
1681 }
1682 }
1683 return success;
1684}
1685
1686static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1687{
1688 int success;
1689
1690 if (cpu_buffer->nr_pages_to_update > 0)
1691 success = rb_insert_pages(cpu_buffer);
1692 else
1693 success = rb_remove_pages(cpu_buffer,
1694 -cpu_buffer->nr_pages_to_update);
1695
1696 if (success)
1697 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1698}
1699
1700static void update_pages_handler(struct work_struct *work)
1701{
1702 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1703 struct ring_buffer_per_cpu, update_pages_work);
1704 rb_update_pages(cpu_buffer);
1705 complete(&cpu_buffer->update_done);
1706}
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
1719 int cpu_id)
1720{
1721 struct ring_buffer_per_cpu *cpu_buffer;
1722 unsigned long nr_pages;
1723 int cpu, err = 0;
1724
1725
1726
1727
1728 if (!buffer)
1729 return size;
1730
1731
1732 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1733 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1734 return size;
1735
1736 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1737
1738
1739 if (nr_pages < 2)
1740 nr_pages = 2;
1741
1742 size = nr_pages * BUF_PAGE_SIZE;
1743
1744
1745 mutex_lock(&buffer->mutex);
1746
1747
1748 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1749
1750
1751
1752
1753
1754 for_each_buffer_cpu(buffer, cpu) {
1755 cpu_buffer = buffer->buffers[cpu];
1756 if (atomic_read(&cpu_buffer->resize_disabled)) {
1757 err = -EBUSY;
1758 goto out_err_unlock;
1759 }
1760 }
1761
1762
1763 for_each_buffer_cpu(buffer, cpu) {
1764 cpu_buffer = buffer->buffers[cpu];
1765
1766 cpu_buffer->nr_pages_to_update = nr_pages -
1767 cpu_buffer->nr_pages;
1768
1769
1770
1771 if (cpu_buffer->nr_pages_to_update <= 0)
1772 continue;
1773
1774
1775
1776
1777 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1778 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1779 &cpu_buffer->new_pages, cpu)) {
1780
1781 err = -ENOMEM;
1782 goto out_err;
1783 }
1784 }
1785
1786 get_online_cpus();
1787
1788
1789
1790
1791
1792 for_each_buffer_cpu(buffer, cpu) {
1793 cpu_buffer = buffer->buffers[cpu];
1794 if (!cpu_buffer->nr_pages_to_update)
1795 continue;
1796
1797
1798 if (!cpu_online(cpu)) {
1799 rb_update_pages(cpu_buffer);
1800 cpu_buffer->nr_pages_to_update = 0;
1801 } else {
1802 schedule_work_on(cpu,
1803 &cpu_buffer->update_pages_work);
1804 }
1805 }
1806
1807
1808 for_each_buffer_cpu(buffer, cpu) {
1809 cpu_buffer = buffer->buffers[cpu];
1810 if (!cpu_buffer->nr_pages_to_update)
1811 continue;
1812
1813 if (cpu_online(cpu))
1814 wait_for_completion(&cpu_buffer->update_done);
1815 cpu_buffer->nr_pages_to_update = 0;
1816 }
1817
1818 put_online_cpus();
1819 } else {
1820
1821 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1822 goto out;
1823
1824 cpu_buffer = buffer->buffers[cpu_id];
1825
1826 if (nr_pages == cpu_buffer->nr_pages)
1827 goto out;
1828
1829
1830
1831
1832
1833
1834 if (atomic_read(&cpu_buffer->resize_disabled)) {
1835 err = -EBUSY;
1836 goto out_err_unlock;
1837 }
1838
1839 cpu_buffer->nr_pages_to_update = nr_pages -
1840 cpu_buffer->nr_pages;
1841
1842 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1843 if (cpu_buffer->nr_pages_to_update > 0 &&
1844 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1845 &cpu_buffer->new_pages, cpu_id)) {
1846 err = -ENOMEM;
1847 goto out_err;
1848 }
1849
1850 get_online_cpus();
1851
1852
1853 if (!cpu_online(cpu_id))
1854 rb_update_pages(cpu_buffer);
1855 else {
1856 schedule_work_on(cpu_id,
1857 &cpu_buffer->update_pages_work);
1858 wait_for_completion(&cpu_buffer->update_done);
1859 }
1860
1861 cpu_buffer->nr_pages_to_update = 0;
1862 put_online_cpus();
1863 }
1864
1865 out:
1866
1867
1868
1869
1870
1871
1872
1873 if (atomic_read(&buffer->record_disabled)) {
1874 atomic_inc(&buffer->record_disabled);
1875
1876
1877
1878
1879
1880
1881 synchronize_rcu();
1882 for_each_buffer_cpu(buffer, cpu) {
1883 cpu_buffer = buffer->buffers[cpu];
1884 rb_check_pages(cpu_buffer);
1885 }
1886 atomic_dec(&buffer->record_disabled);
1887 }
1888
1889 mutex_unlock(&buffer->mutex);
1890 return size;
1891
1892 out_err:
1893 for_each_buffer_cpu(buffer, cpu) {
1894 struct buffer_page *bpage, *tmp;
1895
1896 cpu_buffer = buffer->buffers[cpu];
1897 cpu_buffer->nr_pages_to_update = 0;
1898
1899 if (list_empty(&cpu_buffer->new_pages))
1900 continue;
1901
1902 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1903 list) {
1904 list_del_init(&bpage->list);
1905 free_buffer_page(bpage);
1906 }
1907 }
1908 out_err_unlock:
1909 mutex_unlock(&buffer->mutex);
1910 return err;
1911}
1912EXPORT_SYMBOL_GPL(ring_buffer_resize);
1913
1914void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val)
1915{
1916 mutex_lock(&buffer->mutex);
1917 if (val)
1918 buffer->flags |= RB_FL_OVERWRITE;
1919 else
1920 buffer->flags &= ~RB_FL_OVERWRITE;
1921 mutex_unlock(&buffer->mutex);
1922}
1923EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1924
1925static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1926{
1927 return bpage->page->data + index;
1928}
1929
1930static __always_inline struct ring_buffer_event *
1931rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1932{
1933 return __rb_page_index(cpu_buffer->reader_page,
1934 cpu_buffer->reader_page->read);
1935}
1936
1937static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
1938{
1939 return local_read(&bpage->page->commit);
1940}
1941
1942static struct ring_buffer_event *
1943rb_iter_head_event(struct ring_buffer_iter *iter)
1944{
1945 struct ring_buffer_event *event;
1946 struct buffer_page *iter_head_page = iter->head_page;
1947 unsigned long commit;
1948 unsigned length;
1949
1950 if (iter->head != iter->next_event)
1951 return iter->event;
1952
1953
1954
1955
1956
1957
1958 commit = rb_page_commit(iter_head_page);
1959 smp_rmb();
1960 event = __rb_page_index(iter_head_page, iter->head);
1961 length = rb_event_length(event);
1962
1963
1964
1965
1966
1967 barrier();
1968
1969 if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
1970
1971 goto reset;
1972
1973 memcpy(iter->event, event, length);
1974
1975
1976
1977
1978 smp_rmb();
1979
1980
1981 if (iter->page_stamp != iter_head_page->page->time_stamp ||
1982 commit > rb_page_commit(iter_head_page))
1983 goto reset;
1984
1985 iter->next_event = iter->head + length;
1986 return iter->event;
1987 reset:
1988
1989 iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
1990 iter->head = 0;
1991 iter->next_event = 0;
1992 iter->missed_events = 1;
1993 return NULL;
1994}
1995
1996
1997static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
1998{
1999 return rb_page_commit(bpage);
2000}
2001
2002static __always_inline unsigned
2003rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
2004{
2005 return rb_page_commit(cpu_buffer->commit_page);
2006}
2007
2008static __always_inline unsigned
2009rb_event_index(struct ring_buffer_event *event)
2010{
2011 unsigned long addr = (unsigned long)event;
2012
2013 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
2014}
2015
2016static void rb_inc_iter(struct ring_buffer_iter *iter)
2017{
2018 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2019
2020
2021
2022
2023
2024
2025
2026 if (iter->head_page == cpu_buffer->reader_page)
2027 iter->head_page = rb_set_head_page(cpu_buffer);
2028 else
2029 rb_inc_page(cpu_buffer, &iter->head_page);
2030
2031 iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
2032 iter->head = 0;
2033 iter->next_event = 0;
2034}
2035
2036
2037
2038
2039
2040
2041
2042
2043static int
2044rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
2045 struct buffer_page *tail_page,
2046 struct buffer_page *next_page)
2047{
2048 struct buffer_page *new_head;
2049 int entries;
2050 int type;
2051 int ret;
2052
2053 entries = rb_page_entries(next_page);
2054
2055
2056
2057
2058
2059
2060 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
2061 RB_PAGE_HEAD);
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074 switch (type) {
2075 case RB_PAGE_HEAD:
2076
2077
2078
2079
2080
2081 local_add(entries, &cpu_buffer->overrun);
2082 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
2083
2084
2085
2086
2087
2088
2089
2090 break;
2091
2092 case RB_PAGE_UPDATE:
2093
2094
2095
2096
2097 break;
2098 case RB_PAGE_NORMAL:
2099
2100
2101
2102
2103
2104 return 1;
2105 case RB_PAGE_MOVED:
2106
2107
2108
2109
2110
2111 return 1;
2112 default:
2113 RB_WARN_ON(cpu_buffer, 1);
2114 return -1;
2115 }
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131 new_head = next_page;
2132 rb_inc_page(cpu_buffer, &new_head);
2133
2134 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
2135 RB_PAGE_NORMAL);
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145 switch (ret) {
2146 case RB_PAGE_HEAD:
2147 case RB_PAGE_NORMAL:
2148
2149 break;
2150 default:
2151 RB_WARN_ON(cpu_buffer, 1);
2152 return -1;
2153 }
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165 if (ret == RB_PAGE_NORMAL) {
2166 struct buffer_page *buffer_tail_page;
2167
2168 buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
2169
2170
2171
2172
2173 if (buffer_tail_page != tail_page &&
2174 buffer_tail_page != next_page)
2175 rb_head_page_set_normal(cpu_buffer, new_head,
2176 next_page,
2177 RB_PAGE_HEAD);
2178 }
2179
2180
2181
2182
2183
2184
2185 if (type == RB_PAGE_HEAD) {
2186 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2187 tail_page,
2188 RB_PAGE_UPDATE);
2189 if (RB_WARN_ON(cpu_buffer,
2190 ret != RB_PAGE_UPDATE))
2191 return -1;
2192 }
2193
2194 return 0;
2195}
2196
2197static inline void
2198rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2199 unsigned long tail, struct rb_event_info *info)
2200{
2201 struct buffer_page *tail_page = info->tail_page;
2202 struct ring_buffer_event *event;
2203 unsigned long length = info->length;
2204
2205
2206
2207
2208
2209 if (tail >= BUF_PAGE_SIZE) {
2210
2211
2212
2213
2214
2215 if (tail == BUF_PAGE_SIZE)
2216 tail_page->real_end = 0;
2217
2218 local_sub(length, &tail_page->write);
2219 return;
2220 }
2221
2222 event = __rb_page_index(tail_page, tail);
2223
2224
2225 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2226
2227
2228
2229
2230
2231
2232 tail_page->real_end = tail;
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2246
2247
2248
2249 rb_event_set_padding(event);
2250
2251
2252 local_sub(length, &tail_page->write);
2253 return;
2254 }
2255
2256
2257 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2258 event->type_len = RINGBUF_TYPE_PADDING;
2259
2260 event->time_delta = 1;
2261
2262
2263 length = (tail + length) - BUF_PAGE_SIZE;
2264 local_sub(length, &tail_page->write);
2265}
2266
2267static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2268
2269
2270
2271
2272static noinline struct ring_buffer_event *
2273rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2274 unsigned long tail, struct rb_event_info *info)
2275{
2276 struct buffer_page *tail_page = info->tail_page;
2277 struct buffer_page *commit_page = cpu_buffer->commit_page;
2278 struct trace_buffer *buffer = cpu_buffer->buffer;
2279 struct buffer_page *next_page;
2280 int ret;
2281
2282 next_page = tail_page;
2283
2284 rb_inc_page(cpu_buffer, &next_page);
2285
2286
2287
2288
2289
2290
2291 if (unlikely(next_page == commit_page)) {
2292 local_inc(&cpu_buffer->commit_overrun);
2293 goto out_reset;
2294 }
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2311
2312
2313
2314
2315
2316 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2317
2318
2319
2320
2321 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2322 local_inc(&cpu_buffer->dropped_events);
2323 goto out_reset;
2324 }
2325
2326 ret = rb_handle_head_page(cpu_buffer,
2327 tail_page,
2328 next_page);
2329 if (ret < 0)
2330 goto out_reset;
2331 if (ret)
2332 goto out_again;
2333 } else {
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344 if (unlikely((cpu_buffer->commit_page !=
2345 cpu_buffer->tail_page) &&
2346 (cpu_buffer->commit_page ==
2347 cpu_buffer->reader_page))) {
2348 local_inc(&cpu_buffer->commit_overrun);
2349 goto out_reset;
2350 }
2351 }
2352 }
2353
2354 rb_tail_page_update(cpu_buffer, tail_page, next_page);
2355
2356 out_again:
2357
2358 rb_reset_tail(cpu_buffer, tail, info);
2359
2360
2361 rb_end_commit(cpu_buffer);
2362
2363 local_inc(&cpu_buffer->committing);
2364
2365
2366 return ERR_PTR(-EAGAIN);
2367
2368 out_reset:
2369
2370 rb_reset_tail(cpu_buffer, tail, info);
2371
2372 return NULL;
2373}
2374
2375
2376static noinline struct ring_buffer_event *
2377rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
2378{
2379 if (abs)
2380 event->type_len = RINGBUF_TYPE_TIME_STAMP;
2381 else
2382 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2383
2384
2385 if (abs || rb_event_index(event)) {
2386 event->time_delta = delta & TS_MASK;
2387 event->array[0] = delta >> TS_SHIFT;
2388 } else {
2389
2390 event->time_delta = 0;
2391 event->array[0] = 0;
2392 }
2393
2394 return skip_time_extend(event);
2395}
2396
2397static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2398 struct ring_buffer_event *event);
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411static void
2412rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2413 struct ring_buffer_event *event,
2414 struct rb_event_info *info)
2415{
2416 unsigned length = info->length;
2417 u64 delta = info->delta;
2418
2419
2420 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2421 delta = 0;
2422
2423
2424
2425
2426
2427 if (unlikely(info->add_timestamp)) {
2428 bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
2429
2430 event = rb_add_time_stamp(event, abs ? info->delta : delta, abs);
2431 length -= RB_LEN_TIME_EXTEND;
2432 delta = 0;
2433 }
2434
2435 event->time_delta = delta;
2436 length -= RB_EVNT_HDR_SIZE;
2437 if (length > RB_MAX_SMALL_DATA) {
2438 event->type_len = 0;
2439 event->array[0] = length;
2440 } else
2441 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2442}
2443
2444static unsigned rb_calculate_event_length(unsigned length)
2445{
2446 struct ring_buffer_event event;
2447
2448
2449 if (!length)
2450 length++;
2451
2452 if (length > RB_MAX_SMALL_DATA)
2453 length += sizeof(event.array[0]);
2454
2455 length += RB_EVNT_HDR_SIZE;
2456 length = ALIGN(length, RB_ALIGNMENT);
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2471 length += RB_ALIGNMENT;
2472
2473 return length;
2474}
2475
2476#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2477static inline bool sched_clock_stable(void)
2478{
2479 return true;
2480}
2481#endif
2482
2483static inline int
2484rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2485 struct ring_buffer_event *event)
2486{
2487 unsigned long new_index, old_index;
2488 struct buffer_page *bpage;
2489 unsigned long index;
2490 unsigned long addr;
2491
2492 new_index = rb_event_index(event);
2493 old_index = new_index + rb_event_ts_length(event);
2494 addr = (unsigned long)event;
2495 addr &= PAGE_MASK;
2496
2497 bpage = READ_ONCE(cpu_buffer->tail_page);
2498
2499 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2500 unsigned long write_mask =
2501 local_read(&bpage->write) & ~RB_WRITE_MASK;
2502 unsigned long event_length = rb_event_length(event);
2503
2504
2505
2506
2507
2508
2509 old_index += write_mask;
2510 new_index += write_mask;
2511 index = local_cmpxchg(&bpage->write, old_index, new_index);
2512 if (index == old_index) {
2513
2514 local_sub(event_length, &cpu_buffer->entries_bytes);
2515 return 1;
2516 }
2517 }
2518
2519
2520 return 0;
2521}
2522
2523static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2524{
2525 local_inc(&cpu_buffer->committing);
2526 local_inc(&cpu_buffer->commits);
2527}
2528
2529static __always_inline void
2530rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2531{
2532 unsigned long max_count;
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542 again:
2543 max_count = cpu_buffer->nr_pages * 100;
2544
2545 while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2546 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2547 return;
2548 if (RB_WARN_ON(cpu_buffer,
2549 rb_is_reader_page(cpu_buffer->tail_page)))
2550 return;
2551 local_set(&cpu_buffer->commit_page->page->commit,
2552 rb_page_write(cpu_buffer->commit_page));
2553 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
2554
2555 if (rb_page_write(cpu_buffer->commit_page))
2556 cpu_buffer->write_stamp =
2557 cpu_buffer->commit_page->page->time_stamp;
2558
2559 barrier();
2560 }
2561 while (rb_commit_index(cpu_buffer) !=
2562 rb_page_write(cpu_buffer->commit_page)) {
2563
2564 local_set(&cpu_buffer->commit_page->page->commit,
2565 rb_page_write(cpu_buffer->commit_page));
2566 RB_WARN_ON(cpu_buffer,
2567 local_read(&cpu_buffer->commit_page->page->commit) &
2568 ~RB_WRITE_MASK);
2569 barrier();
2570 }
2571
2572
2573 barrier();
2574
2575
2576
2577
2578
2579
2580 if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
2581 goto again;
2582}
2583
2584static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2585{
2586 unsigned long commits;
2587
2588 if (RB_WARN_ON(cpu_buffer,
2589 !local_read(&cpu_buffer->committing)))
2590 return;
2591
2592 again:
2593 commits = local_read(&cpu_buffer->commits);
2594
2595 barrier();
2596 if (local_read(&cpu_buffer->committing) == 1)
2597 rb_set_commit_to_write(cpu_buffer);
2598
2599 local_dec(&cpu_buffer->committing);
2600
2601
2602 barrier();
2603
2604
2605
2606
2607
2608
2609 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2610 !local_read(&cpu_buffer->committing)) {
2611 local_inc(&cpu_buffer->committing);
2612 goto again;
2613 }
2614}
2615
2616static inline void rb_event_discard(struct ring_buffer_event *event)
2617{
2618 if (extended_time(event))
2619 event = skip_time_extend(event);
2620
2621
2622 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2623 event->type_len = RINGBUF_TYPE_PADDING;
2624
2625 if (!event->time_delta)
2626 event->time_delta = 1;
2627}
2628
2629static __always_inline bool
2630rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2631 struct ring_buffer_event *event)
2632{
2633 unsigned long addr = (unsigned long)event;
2634 unsigned long index;
2635
2636 index = rb_event_index(event);
2637 addr &= PAGE_MASK;
2638
2639 return cpu_buffer->commit_page->page == (void *)addr &&
2640 rb_commit_index(cpu_buffer) == index;
2641}
2642
2643static __always_inline void
2644rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2645 struct ring_buffer_event *event)
2646{
2647 u64 delta;
2648
2649
2650
2651
2652
2653 if (rb_event_is_commit(cpu_buffer, event)) {
2654
2655
2656
2657
2658 if (!rb_event_index(event))
2659 cpu_buffer->write_stamp =
2660 cpu_buffer->commit_page->page->time_stamp;
2661 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2662 delta = ring_buffer_event_time_stamp(event);
2663 cpu_buffer->write_stamp += delta;
2664 } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
2665 delta = ring_buffer_event_time_stamp(event);
2666 cpu_buffer->write_stamp = delta;
2667 } else
2668 cpu_buffer->write_stamp += event->time_delta;
2669 }
2670}
2671
2672static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2673 struct ring_buffer_event *event)
2674{
2675 local_inc(&cpu_buffer->entries);
2676 rb_update_write_stamp(cpu_buffer, event);
2677 rb_end_commit(cpu_buffer);
2678}
2679
2680static __always_inline void
2681rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2682{
2683 size_t nr_pages;
2684 size_t dirty;
2685 size_t full;
2686
2687 if (buffer->irq_work.waiters_pending) {
2688 buffer->irq_work.waiters_pending = false;
2689
2690 irq_work_queue(&buffer->irq_work.work);
2691 }
2692
2693 if (cpu_buffer->irq_work.waiters_pending) {
2694 cpu_buffer->irq_work.waiters_pending = false;
2695
2696 irq_work_queue(&cpu_buffer->irq_work.work);
2697 }
2698
2699 if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
2700 return;
2701
2702 if (cpu_buffer->reader_page == cpu_buffer->commit_page)
2703 return;
2704
2705 if (!cpu_buffer->irq_work.full_waiters_pending)
2706 return;
2707
2708 cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
2709
2710 full = cpu_buffer->shortest_full;
2711 nr_pages = cpu_buffer->nr_pages;
2712 dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
2713 if (full && nr_pages && (dirty * 100) <= full * nr_pages)
2714 return;
2715
2716 cpu_buffer->irq_work.wakeup_full = true;
2717 cpu_buffer->irq_work.full_waiters_pending = false;
2718
2719 irq_work_queue(&cpu_buffer->irq_work.work);
2720}
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760static __always_inline int
2761trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2762{
2763 unsigned int val = cpu_buffer->current_context;
2764 unsigned long pc = preempt_count();
2765 int bit;
2766
2767 if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
2768 bit = RB_CTX_NORMAL;
2769 else
2770 bit = pc & NMI_MASK ? RB_CTX_NMI :
2771 pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
2772
2773 if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
2774 return 1;
2775
2776 val |= (1 << (bit + cpu_buffer->nest));
2777 cpu_buffer->current_context = val;
2778
2779 return 0;
2780}
2781
2782static __always_inline void
2783trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2784{
2785 cpu_buffer->current_context &=
2786 cpu_buffer->current_context - (1 << cpu_buffer->nest);
2787}
2788
2789
2790#define NESTED_BITS 4
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805void ring_buffer_nest_start(struct trace_buffer *buffer)
2806{
2807 struct ring_buffer_per_cpu *cpu_buffer;
2808 int cpu;
2809
2810
2811 preempt_disable_notrace();
2812 cpu = raw_smp_processor_id();
2813 cpu_buffer = buffer->buffers[cpu];
2814
2815 cpu_buffer->nest += NESTED_BITS;
2816}
2817
2818
2819
2820
2821
2822
2823
2824
2825void ring_buffer_nest_end(struct trace_buffer *buffer)
2826{
2827 struct ring_buffer_per_cpu *cpu_buffer;
2828 int cpu;
2829
2830
2831 cpu = raw_smp_processor_id();
2832 cpu_buffer = buffer->buffers[cpu];
2833
2834 cpu_buffer->nest -= NESTED_BITS;
2835 preempt_enable_notrace();
2836}
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847int ring_buffer_unlock_commit(struct trace_buffer *buffer,
2848 struct ring_buffer_event *event)
2849{
2850 struct ring_buffer_per_cpu *cpu_buffer;
2851 int cpu = raw_smp_processor_id();
2852
2853 cpu_buffer = buffer->buffers[cpu];
2854
2855 rb_commit(cpu_buffer, event);
2856
2857 rb_wakeups(buffer, cpu_buffer);
2858
2859 trace_recursive_unlock(cpu_buffer);
2860
2861 preempt_enable_notrace();
2862
2863 return 0;
2864}
2865EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2866
2867static noinline void
2868rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2869 struct rb_event_info *info)
2870{
2871 WARN_ONCE(info->delta > (1ULL << 59),
2872 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2873 (unsigned long long)info->delta,
2874 (unsigned long long)info->ts,
2875 (unsigned long long)cpu_buffer->write_stamp,
2876 sched_clock_stable() ? "" :
2877 "If you just came from a suspend/resume,\n"
2878 "please switch to the trace global clock:\n"
2879 " echo global > /sys/kernel/debug/tracing/trace_clock\n"
2880 "or add trace_clock=global to the kernel command line\n");
2881 info->add_timestamp = 1;
2882}
2883
2884static struct ring_buffer_event *
2885__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2886 struct rb_event_info *info)
2887{
2888 struct ring_buffer_event *event;
2889 struct buffer_page *tail_page;
2890 unsigned long tail, write;
2891
2892
2893
2894
2895
2896
2897 if (unlikely(info->add_timestamp))
2898 info->length += RB_LEN_TIME_EXTEND;
2899
2900
2901 tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
2902 write = local_add_return(info->length, &tail_page->write);
2903
2904
2905 write &= RB_WRITE_MASK;
2906 tail = write - info->length;
2907
2908
2909
2910
2911
2912 if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
2913 info->delta = 0;
2914
2915
2916 if (unlikely(write > BUF_PAGE_SIZE))
2917 return rb_move_tail(cpu_buffer, tail, info);
2918
2919
2920
2921 event = __rb_page_index(tail_page, tail);
2922 rb_update_event(cpu_buffer, event, info);
2923
2924 local_inc(&tail_page->entries);
2925
2926
2927
2928
2929
2930 if (!tail)
2931 tail_page->page->time_stamp = info->ts;
2932
2933
2934 local_add(info->length, &cpu_buffer->entries_bytes);
2935
2936 return event;
2937}
2938
2939static __always_inline struct ring_buffer_event *
2940rb_reserve_next_event(struct trace_buffer *buffer,
2941 struct ring_buffer_per_cpu *cpu_buffer,
2942 unsigned long length)
2943{
2944 struct ring_buffer_event *event;
2945 struct rb_event_info info;
2946 int nr_loops = 0;
2947 u64 diff;
2948
2949 rb_start_commit(cpu_buffer);
2950
2951#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2952
2953
2954
2955
2956
2957
2958 barrier();
2959 if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
2960 local_dec(&cpu_buffer->committing);
2961 local_dec(&cpu_buffer->commits);
2962 return NULL;
2963 }
2964#endif
2965
2966 info.length = rb_calculate_event_length(length);
2967 again:
2968 info.add_timestamp = 0;
2969 info.delta = 0;
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2981 goto out_fail;
2982
2983 info.ts = rb_time_stamp(cpu_buffer->buffer);
2984 diff = info.ts - cpu_buffer->write_stamp;
2985
2986
2987 barrier();
2988
2989 if (ring_buffer_time_stamp_abs(buffer)) {
2990 info.delta = info.ts;
2991 rb_handle_timestamp(cpu_buffer, &info);
2992 } else
2993 if (likely(info.ts >= cpu_buffer->write_stamp)) {
2994 info.delta = diff;
2995 if (unlikely(test_time_stamp(info.delta)))
2996 rb_handle_timestamp(cpu_buffer, &info);
2997 }
2998
2999 event = __rb_reserve_next(cpu_buffer, &info);
3000
3001 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
3002 if (info.add_timestamp)
3003 info.length -= RB_LEN_TIME_EXTEND;
3004 goto again;
3005 }
3006
3007 if (!event)
3008 goto out_fail;
3009
3010 return event;
3011
3012 out_fail:
3013 rb_end_commit(cpu_buffer);
3014 return NULL;
3015}
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032struct ring_buffer_event *
3033ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
3034{
3035 struct ring_buffer_per_cpu *cpu_buffer;
3036 struct ring_buffer_event *event;
3037 int cpu;
3038
3039
3040 preempt_disable_notrace();
3041
3042 if (unlikely(atomic_read(&buffer->record_disabled)))
3043 goto out;
3044
3045 cpu = raw_smp_processor_id();
3046
3047 if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
3048 goto out;
3049
3050 cpu_buffer = buffer->buffers[cpu];
3051
3052 if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
3053 goto out;
3054
3055 if (unlikely(length > BUF_MAX_DATA_SIZE))
3056 goto out;
3057
3058 if (unlikely(trace_recursive_lock(cpu_buffer)))
3059 goto out;
3060
3061 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3062 if (!event)
3063 goto out_unlock;
3064
3065 return event;
3066
3067 out_unlock:
3068 trace_recursive_unlock(cpu_buffer);
3069 out:
3070 preempt_enable_notrace();
3071 return NULL;
3072}
3073EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
3074
3075
3076
3077
3078
3079
3080
3081static inline void
3082rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
3083 struct ring_buffer_event *event)
3084{
3085 unsigned long addr = (unsigned long)event;
3086 struct buffer_page *bpage = cpu_buffer->commit_page;
3087 struct buffer_page *start;
3088
3089 addr &= PAGE_MASK;
3090
3091
3092 if (likely(bpage->page == (void *)addr)) {
3093 local_dec(&bpage->entries);
3094 return;
3095 }
3096
3097
3098
3099
3100
3101 rb_inc_page(cpu_buffer, &bpage);
3102 start = bpage;
3103 do {
3104 if (bpage->page == (void *)addr) {
3105 local_dec(&bpage->entries);
3106 return;
3107 }
3108 rb_inc_page(cpu_buffer, &bpage);
3109 } while (bpage != start);
3110
3111
3112 RB_WARN_ON(cpu_buffer, 1);
3113}
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134void ring_buffer_discard_commit(struct trace_buffer *buffer,
3135 struct ring_buffer_event *event)
3136{
3137 struct ring_buffer_per_cpu *cpu_buffer;
3138 int cpu;
3139
3140
3141 rb_event_discard(event);
3142
3143 cpu = smp_processor_id();
3144 cpu_buffer = buffer->buffers[cpu];
3145
3146
3147
3148
3149
3150
3151 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
3152
3153 rb_decrement_entry(cpu_buffer, event);
3154 if (rb_try_to_discard(cpu_buffer, event))
3155 goto out;
3156
3157
3158
3159
3160
3161 rb_update_write_stamp(cpu_buffer, event);
3162 out:
3163 rb_end_commit(cpu_buffer);
3164
3165 trace_recursive_unlock(cpu_buffer);
3166
3167 preempt_enable_notrace();
3168
3169}
3170EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185int ring_buffer_write(struct trace_buffer *buffer,
3186 unsigned long length,
3187 void *data)
3188{
3189 struct ring_buffer_per_cpu *cpu_buffer;
3190 struct ring_buffer_event *event;
3191 void *body;
3192 int ret = -EBUSY;
3193 int cpu;
3194
3195 preempt_disable_notrace();
3196
3197 if (atomic_read(&buffer->record_disabled))
3198 goto out;
3199
3200 cpu = raw_smp_processor_id();
3201
3202 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3203 goto out;
3204
3205 cpu_buffer = buffer->buffers[cpu];
3206
3207 if (atomic_read(&cpu_buffer->record_disabled))
3208 goto out;
3209
3210 if (length > BUF_MAX_DATA_SIZE)
3211 goto out;
3212
3213 if (unlikely(trace_recursive_lock(cpu_buffer)))
3214 goto out;
3215
3216 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3217 if (!event)
3218 goto out_unlock;
3219
3220 body = rb_event_data(event);
3221
3222 memcpy(body, data, length);
3223
3224 rb_commit(cpu_buffer, event);
3225
3226 rb_wakeups(buffer, cpu_buffer);
3227
3228 ret = 0;
3229
3230 out_unlock:
3231 trace_recursive_unlock(cpu_buffer);
3232
3233 out:
3234 preempt_enable_notrace();
3235
3236 return ret;
3237}
3238EXPORT_SYMBOL_GPL(ring_buffer_write);
3239
3240static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3241{
3242 struct buffer_page *reader = cpu_buffer->reader_page;
3243 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3244 struct buffer_page *commit = cpu_buffer->commit_page;
3245
3246
3247 if (unlikely(!head))
3248 return true;
3249
3250 return reader->read == rb_page_commit(reader) &&
3251 (commit == reader ||
3252 (commit == head &&
3253 head->read == rb_page_commit(commit)));
3254}
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265void ring_buffer_record_disable(struct trace_buffer *buffer)
3266{
3267 atomic_inc(&buffer->record_disabled);
3268}
3269EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3270
3271
3272
3273
3274
3275
3276
3277
3278void ring_buffer_record_enable(struct trace_buffer *buffer)
3279{
3280 atomic_dec(&buffer->record_disabled);
3281}
3282EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295void ring_buffer_record_off(struct trace_buffer *buffer)
3296{
3297 unsigned int rd;
3298 unsigned int new_rd;
3299
3300 do {
3301 rd = atomic_read(&buffer->record_disabled);
3302 new_rd = rd | RB_BUFFER_OFF;
3303 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3304}
3305EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318void ring_buffer_record_on(struct trace_buffer *buffer)
3319{
3320 unsigned int rd;
3321 unsigned int new_rd;
3322
3323 do {
3324 rd = atomic_read(&buffer->record_disabled);
3325 new_rd = rd & ~RB_BUFFER_OFF;
3326 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3327}
3328EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3329
3330
3331
3332
3333
3334
3335
3336bool ring_buffer_record_is_on(struct trace_buffer *buffer)
3337{
3338 return !atomic_read(&buffer->record_disabled);
3339}
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352bool ring_buffer_record_is_set_on(struct trace_buffer *buffer)
3353{
3354 return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
3355}
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu)
3368{
3369 struct ring_buffer_per_cpu *cpu_buffer;
3370
3371 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3372 return;
3373
3374 cpu_buffer = buffer->buffers[cpu];
3375 atomic_inc(&cpu_buffer->record_disabled);
3376}
3377EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu)
3388{
3389 struct ring_buffer_per_cpu *cpu_buffer;
3390
3391 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3392 return;
3393
3394 cpu_buffer = buffer->buffers[cpu];
3395 atomic_dec(&cpu_buffer->record_disabled);
3396}
3397EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
3398
3399
3400
3401
3402
3403
3404
3405static inline unsigned long
3406rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3407{
3408 return local_read(&cpu_buffer->entries) -
3409 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3410}
3411
3412
3413
3414
3415
3416
3417u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
3418{
3419 unsigned long flags;
3420 struct ring_buffer_per_cpu *cpu_buffer;
3421 struct buffer_page *bpage;
3422 u64 ret = 0;
3423
3424 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3425 return 0;
3426
3427 cpu_buffer = buffer->buffers[cpu];
3428 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3429
3430
3431
3432
3433 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3434 bpage = cpu_buffer->reader_page;
3435 else
3436 bpage = rb_set_head_page(cpu_buffer);
3437 if (bpage)
3438 ret = bpage->page->time_stamp;
3439 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3440
3441 return ret;
3442}
3443EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3444
3445
3446
3447
3448
3449
3450unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu)
3451{
3452 struct ring_buffer_per_cpu *cpu_buffer;
3453 unsigned long ret;
3454
3455 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3456 return 0;
3457
3458 cpu_buffer = buffer->buffers[cpu];
3459 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3460
3461 return ret;
3462}
3463EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3464
3465
3466
3467
3468
3469
3470unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu)
3471{
3472 struct ring_buffer_per_cpu *cpu_buffer;
3473
3474 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3475 return 0;
3476
3477 cpu_buffer = buffer->buffers[cpu];
3478
3479 return rb_num_of_entries(cpu_buffer);
3480}
3481EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3482
3483
3484
3485
3486
3487
3488
3489unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu)
3490{
3491 struct ring_buffer_per_cpu *cpu_buffer;
3492 unsigned long ret;
3493
3494 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3495 return 0;
3496
3497 cpu_buffer = buffer->buffers[cpu];
3498 ret = local_read(&cpu_buffer->overrun);
3499
3500 return ret;
3501}
3502EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3503
3504
3505
3506
3507
3508
3509
3510
3511unsigned long
3512ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu)
3513{
3514 struct ring_buffer_per_cpu *cpu_buffer;
3515 unsigned long ret;
3516
3517 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3518 return 0;
3519
3520 cpu_buffer = buffer->buffers[cpu];
3521 ret = local_read(&cpu_buffer->commit_overrun);
3522
3523 return ret;
3524}
3525EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3526
3527
3528
3529
3530
3531
3532
3533unsigned long
3534ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu)
3535{
3536 struct ring_buffer_per_cpu *cpu_buffer;
3537 unsigned long ret;
3538
3539 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3540 return 0;
3541
3542 cpu_buffer = buffer->buffers[cpu];
3543 ret = local_read(&cpu_buffer->dropped_events);
3544
3545 return ret;
3546}
3547EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3548
3549
3550
3551
3552
3553
3554unsigned long
3555ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu)
3556{
3557 struct ring_buffer_per_cpu *cpu_buffer;
3558
3559 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3560 return 0;
3561
3562 cpu_buffer = buffer->buffers[cpu];
3563 return cpu_buffer->read;
3564}
3565EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3566
3567
3568
3569
3570
3571
3572
3573
3574unsigned long ring_buffer_entries(struct trace_buffer *buffer)
3575{
3576 struct ring_buffer_per_cpu *cpu_buffer;
3577 unsigned long entries = 0;
3578 int cpu;
3579
3580
3581 for_each_buffer_cpu(buffer, cpu) {
3582 cpu_buffer = buffer->buffers[cpu];
3583 entries += rb_num_of_entries(cpu_buffer);
3584 }
3585
3586 return entries;
3587}
3588EXPORT_SYMBOL_GPL(ring_buffer_entries);
3589
3590
3591
3592
3593
3594
3595
3596
3597unsigned long ring_buffer_overruns(struct trace_buffer *buffer)
3598{
3599 struct ring_buffer_per_cpu *cpu_buffer;
3600 unsigned long overruns = 0;
3601 int cpu;
3602
3603
3604 for_each_buffer_cpu(buffer, cpu) {
3605 cpu_buffer = buffer->buffers[cpu];
3606 overruns += local_read(&cpu_buffer->overrun);
3607 }
3608
3609 return overruns;
3610}
3611EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3612
3613static void rb_iter_reset(struct ring_buffer_iter *iter)
3614{
3615 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3616
3617
3618 iter->head_page = cpu_buffer->reader_page;
3619 iter->head = cpu_buffer->reader_page->read;
3620 iter->next_event = iter->head;
3621
3622 iter->cache_reader_page = iter->head_page;
3623 iter->cache_read = cpu_buffer->read;
3624
3625 if (iter->head) {
3626 iter->read_stamp = cpu_buffer->read_stamp;
3627 iter->page_stamp = cpu_buffer->reader_page->page->time_stamp;
3628 } else {
3629 iter->read_stamp = iter->head_page->page->time_stamp;
3630 iter->page_stamp = iter->read_stamp;
3631 }
3632}
3633
3634
3635
3636
3637
3638
3639
3640
3641void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3642{
3643 struct ring_buffer_per_cpu *cpu_buffer;
3644 unsigned long flags;
3645
3646 if (!iter)
3647 return;
3648
3649 cpu_buffer = iter->cpu_buffer;
3650
3651 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3652 rb_iter_reset(iter);
3653 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3654}
3655EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3656
3657
3658
3659
3660
3661int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3662{
3663 struct ring_buffer_per_cpu *cpu_buffer;
3664 struct buffer_page *reader;
3665 struct buffer_page *head_page;
3666 struct buffer_page *commit_page;
3667 struct buffer_page *curr_commit_page;
3668 unsigned commit;
3669 u64 curr_commit_ts;
3670 u64 commit_ts;
3671
3672 cpu_buffer = iter->cpu_buffer;
3673 reader = cpu_buffer->reader_page;
3674 head_page = cpu_buffer->head_page;
3675 commit_page = cpu_buffer->commit_page;
3676 commit_ts = commit_page->page->time_stamp;
3677
3678
3679
3680
3681
3682
3683 smp_rmb();
3684 commit = rb_page_commit(commit_page);
3685
3686 smp_rmb();
3687
3688
3689 curr_commit_page = READ_ONCE(cpu_buffer->commit_page);
3690 curr_commit_ts = READ_ONCE(curr_commit_page->page->time_stamp);
3691
3692
3693 if (curr_commit_page != commit_page ||
3694 curr_commit_ts != commit_ts)
3695 return 0;
3696
3697
3698 return ((iter->head_page == commit_page && iter->head >= commit) ||
3699 (iter->head_page == reader && commit_page == head_page &&
3700 head_page->read == commit &&
3701 iter->head == rb_page_commit(cpu_buffer->reader_page)));
3702}
3703EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3704
3705static void
3706rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3707 struct ring_buffer_event *event)
3708{
3709 u64 delta;
3710
3711 switch (event->type_len) {
3712 case RINGBUF_TYPE_PADDING:
3713 return;
3714
3715 case RINGBUF_TYPE_TIME_EXTEND:
3716 delta = ring_buffer_event_time_stamp(event);
3717 cpu_buffer->read_stamp += delta;
3718 return;
3719
3720 case RINGBUF_TYPE_TIME_STAMP:
3721 delta = ring_buffer_event_time_stamp(event);
3722 cpu_buffer->read_stamp = delta;
3723 return;
3724
3725 case RINGBUF_TYPE_DATA:
3726 cpu_buffer->read_stamp += event->time_delta;
3727 return;
3728
3729 default:
3730 RB_WARN_ON(cpu_buffer, 1);
3731 }
3732 return;
3733}
3734
3735static void
3736rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3737 struct ring_buffer_event *event)
3738{
3739 u64 delta;
3740
3741 switch (event->type_len) {
3742 case RINGBUF_TYPE_PADDING:
3743 return;
3744
3745 case RINGBUF_TYPE_TIME_EXTEND:
3746 delta = ring_buffer_event_time_stamp(event);
3747 iter->read_stamp += delta;
3748 return;
3749
3750 case RINGBUF_TYPE_TIME_STAMP:
3751 delta = ring_buffer_event_time_stamp(event);
3752 iter->read_stamp = delta;
3753 return;
3754
3755 case RINGBUF_TYPE_DATA:
3756 iter->read_stamp += event->time_delta;
3757 return;
3758
3759 default:
3760 RB_WARN_ON(iter->cpu_buffer, 1);
3761 }
3762 return;
3763}
3764
3765static struct buffer_page *
3766rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3767{
3768 struct buffer_page *reader = NULL;
3769 unsigned long overwrite;
3770 unsigned long flags;
3771 int nr_loops = 0;
3772 int ret;
3773
3774 local_irq_save(flags);
3775 arch_spin_lock(&cpu_buffer->lock);
3776
3777 again:
3778
3779
3780
3781
3782
3783
3784 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3785 reader = NULL;
3786 goto out;
3787 }
3788
3789 reader = cpu_buffer->reader_page;
3790
3791
3792 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3793 goto out;
3794
3795
3796 if (RB_WARN_ON(cpu_buffer,
3797 cpu_buffer->reader_page->read > rb_page_size(reader)))
3798 goto out;
3799
3800
3801 reader = NULL;
3802 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3803 goto out;
3804
3805
3806 if (rb_num_of_entries(cpu_buffer) == 0)
3807 goto out;
3808
3809
3810
3811
3812 local_set(&cpu_buffer->reader_page->write, 0);
3813 local_set(&cpu_buffer->reader_page->entries, 0);
3814 local_set(&cpu_buffer->reader_page->page->commit, 0);
3815 cpu_buffer->reader_page->real_end = 0;
3816
3817 spin:
3818
3819
3820
3821 reader = rb_set_head_page(cpu_buffer);
3822 if (!reader)
3823 goto out;
3824 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3825 cpu_buffer->reader_page->list.prev = reader->list.prev;
3826
3827
3828
3829
3830
3831
3832 cpu_buffer->pages = reader->list.prev;
3833
3834
3835 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846 smp_mb();
3847 overwrite = local_read(&(cpu_buffer->overrun));
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3861
3862
3863
3864
3865 if (!ret)
3866 goto spin;
3867
3868
3869
3870
3871
3872
3873 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3874 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3875
3876 local_inc(&cpu_buffer->pages_read);
3877
3878
3879 cpu_buffer->reader_page = reader;
3880 cpu_buffer->reader_page->read = 0;
3881
3882 if (overwrite != cpu_buffer->last_overrun) {
3883 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3884 cpu_buffer->last_overrun = overwrite;
3885 }
3886
3887 goto again;
3888
3889 out:
3890
3891 if (reader && reader->read == 0)
3892 cpu_buffer->read_stamp = reader->page->time_stamp;
3893
3894 arch_spin_unlock(&cpu_buffer->lock);
3895 local_irq_restore(flags);
3896
3897 return reader;
3898}
3899
3900static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3901{
3902 struct ring_buffer_event *event;
3903 struct buffer_page *reader;
3904 unsigned length;
3905
3906 reader = rb_get_reader_page(cpu_buffer);
3907
3908
3909 if (RB_WARN_ON(cpu_buffer, !reader))
3910 return;
3911
3912 event = rb_reader_event(cpu_buffer);
3913
3914 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3915 cpu_buffer->read++;
3916
3917 rb_update_read_stamp(cpu_buffer, event);
3918
3919 length = rb_event_length(event);
3920 cpu_buffer->reader_page->read += length;
3921}
3922
3923static void rb_advance_iter(struct ring_buffer_iter *iter)
3924{
3925 struct ring_buffer_per_cpu *cpu_buffer;
3926
3927 cpu_buffer = iter->cpu_buffer;
3928
3929
3930 if (iter->head == iter->next_event) {
3931
3932 if (rb_iter_head_event(iter) == NULL)
3933 return;
3934 }
3935
3936 iter->head = iter->next_event;
3937
3938
3939
3940
3941 if (iter->next_event >= rb_page_size(iter->head_page)) {
3942
3943 if (iter->head_page == cpu_buffer->commit_page)
3944 return;
3945 rb_inc_iter(iter);
3946 return;
3947 }
3948
3949 rb_update_iter_read_stamp(iter, iter->event);
3950}
3951
3952static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3953{
3954 return cpu_buffer->lost_events;
3955}
3956
3957static struct ring_buffer_event *
3958rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3959 unsigned long *lost_events)
3960{
3961 struct ring_buffer_event *event;
3962 struct buffer_page *reader;
3963 int nr_loops = 0;
3964
3965 if (ts)
3966 *ts = 0;
3967 again:
3968
3969
3970
3971
3972
3973
3974 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3975 return NULL;
3976
3977 reader = rb_get_reader_page(cpu_buffer);
3978 if (!reader)
3979 return NULL;
3980
3981 event = rb_reader_event(cpu_buffer);
3982
3983 switch (event->type_len) {
3984 case RINGBUF_TYPE_PADDING:
3985 if (rb_null_event(event))
3986 RB_WARN_ON(cpu_buffer, 1);
3987
3988
3989
3990
3991
3992
3993
3994
3995 return event;
3996
3997 case RINGBUF_TYPE_TIME_EXTEND:
3998
3999 rb_advance_reader(cpu_buffer);
4000 goto again;
4001
4002 case RINGBUF_TYPE_TIME_STAMP:
4003 if (ts) {
4004 *ts = ring_buffer_event_time_stamp(event);
4005 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4006 cpu_buffer->cpu, ts);
4007 }
4008
4009 rb_advance_reader(cpu_buffer);
4010 goto again;
4011
4012 case RINGBUF_TYPE_DATA:
4013 if (ts && !(*ts)) {
4014 *ts = cpu_buffer->read_stamp + event->time_delta;
4015 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4016 cpu_buffer->cpu, ts);
4017 }
4018 if (lost_events)
4019 *lost_events = rb_lost_events(cpu_buffer);
4020 return event;
4021
4022 default:
4023 RB_WARN_ON(cpu_buffer, 1);
4024 }
4025
4026 return NULL;
4027}
4028EXPORT_SYMBOL_GPL(ring_buffer_peek);
4029
4030static struct ring_buffer_event *
4031rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4032{
4033 struct trace_buffer *buffer;
4034 struct ring_buffer_per_cpu *cpu_buffer;
4035 struct ring_buffer_event *event;
4036 int nr_loops = 0;
4037
4038 if (ts)
4039 *ts = 0;
4040
4041 cpu_buffer = iter->cpu_buffer;
4042 buffer = cpu_buffer->buffer;
4043
4044
4045
4046
4047
4048
4049 if (unlikely(iter->cache_read != cpu_buffer->read ||
4050 iter->cache_reader_page != cpu_buffer->reader_page))
4051 rb_iter_reset(iter);
4052
4053 again:
4054 if (ring_buffer_iter_empty(iter))
4055 return NULL;
4056
4057
4058
4059
4060
4061
4062
4063
4064 if (++nr_loops > 3)
4065 return NULL;
4066
4067 if (rb_per_cpu_empty(cpu_buffer))
4068 return NULL;
4069
4070 if (iter->head >= rb_page_size(iter->head_page)) {
4071 rb_inc_iter(iter);
4072 goto again;
4073 }
4074
4075 event = rb_iter_head_event(iter);
4076 if (!event)
4077 goto again;
4078
4079 switch (event->type_len) {
4080 case RINGBUF_TYPE_PADDING:
4081 if (rb_null_event(event)) {
4082 rb_inc_iter(iter);
4083 goto again;
4084 }
4085 rb_advance_iter(iter);
4086 return event;
4087
4088 case RINGBUF_TYPE_TIME_EXTEND:
4089
4090 rb_advance_iter(iter);
4091 goto again;
4092
4093 case RINGBUF_TYPE_TIME_STAMP:
4094 if (ts) {
4095 *ts = ring_buffer_event_time_stamp(event);
4096 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4097 cpu_buffer->cpu, ts);
4098 }
4099
4100 rb_advance_iter(iter);
4101 goto again;
4102
4103 case RINGBUF_TYPE_DATA:
4104 if (ts && !(*ts)) {
4105 *ts = iter->read_stamp + event->time_delta;
4106 ring_buffer_normalize_time_stamp(buffer,
4107 cpu_buffer->cpu, ts);
4108 }
4109 return event;
4110
4111 default:
4112 RB_WARN_ON(cpu_buffer, 1);
4113 }
4114
4115 return NULL;
4116}
4117EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
4118
4119static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
4120{
4121 if (likely(!in_nmi())) {
4122 raw_spin_lock(&cpu_buffer->reader_lock);
4123 return true;
4124 }
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135 if (raw_spin_trylock(&cpu_buffer->reader_lock))
4136 return true;
4137
4138
4139 atomic_inc(&cpu_buffer->record_disabled);
4140 return false;
4141}
4142
4143static inline void
4144rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
4145{
4146 if (likely(locked))
4147 raw_spin_unlock(&cpu_buffer->reader_lock);
4148 return;
4149}
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161struct ring_buffer_event *
4162ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
4163 unsigned long *lost_events)
4164{
4165 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4166 struct ring_buffer_event *event;
4167 unsigned long flags;
4168 bool dolock;
4169
4170 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4171 return NULL;
4172
4173 again:
4174 local_irq_save(flags);
4175 dolock = rb_reader_lock(cpu_buffer);
4176 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4177 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4178 rb_advance_reader(cpu_buffer);
4179 rb_reader_unlock(cpu_buffer, dolock);
4180 local_irq_restore(flags);
4181
4182 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4183 goto again;
4184
4185 return event;
4186}
4187
4188
4189
4190
4191
4192
4193bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter)
4194{
4195 bool ret = iter->missed_events != 0;
4196
4197 iter->missed_events = 0;
4198 return ret;
4199}
4200EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped);
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210struct ring_buffer_event *
4211ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4212{
4213 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4214 struct ring_buffer_event *event;
4215 unsigned long flags;
4216
4217 again:
4218 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4219 event = rb_iter_peek(iter, ts);
4220 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4221
4222 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4223 goto again;
4224
4225 return event;
4226}
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239struct ring_buffer_event *
4240ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
4241 unsigned long *lost_events)
4242{
4243 struct ring_buffer_per_cpu *cpu_buffer;
4244 struct ring_buffer_event *event = NULL;
4245 unsigned long flags;
4246 bool dolock;
4247
4248 again:
4249
4250 preempt_disable();
4251
4252 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4253 goto out;
4254
4255 cpu_buffer = buffer->buffers[cpu];
4256 local_irq_save(flags);
4257 dolock = rb_reader_lock(cpu_buffer);
4258
4259 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4260 if (event) {
4261 cpu_buffer->lost_events = 0;
4262 rb_advance_reader(cpu_buffer);
4263 }
4264
4265 rb_reader_unlock(cpu_buffer, dolock);
4266 local_irq_restore(flags);
4267
4268 out:
4269 preempt_enable();
4270
4271 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4272 goto again;
4273
4274 return event;
4275}
4276EXPORT_SYMBOL_GPL(ring_buffer_consume);
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299struct ring_buffer_iter *
4300ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
4301{
4302 struct ring_buffer_per_cpu *cpu_buffer;
4303 struct ring_buffer_iter *iter;
4304
4305 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4306 return NULL;
4307
4308 iter = kzalloc(sizeof(*iter), flags);
4309 if (!iter)
4310 return NULL;
4311
4312 iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
4313 if (!iter->event) {
4314 kfree(iter);
4315 return NULL;
4316 }
4317
4318 cpu_buffer = buffer->buffers[cpu];
4319
4320 iter->cpu_buffer = cpu_buffer;
4321
4322 atomic_inc(&cpu_buffer->resize_disabled);
4323
4324 return iter;
4325}
4326EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
4327
4328
4329
4330
4331
4332
4333
4334
4335void
4336ring_buffer_read_prepare_sync(void)
4337{
4338 synchronize_rcu();
4339}
4340EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353void
4354ring_buffer_read_start(struct ring_buffer_iter *iter)
4355{
4356 struct ring_buffer_per_cpu *cpu_buffer;
4357 unsigned long flags;
4358
4359 if (!iter)
4360 return;
4361
4362 cpu_buffer = iter->cpu_buffer;
4363
4364 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4365 arch_spin_lock(&cpu_buffer->lock);
4366 rb_iter_reset(iter);
4367 arch_spin_unlock(&cpu_buffer->lock);
4368 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4369}
4370EXPORT_SYMBOL_GPL(ring_buffer_read_start);
4371
4372
4373
4374
4375
4376
4377
4378
4379void
4380ring_buffer_read_finish(struct ring_buffer_iter *iter)
4381{
4382 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4383 unsigned long flags;
4384
4385
4386
4387
4388
4389
4390
4391 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4392 rb_check_pages(cpu_buffer);
4393 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4394
4395 atomic_dec(&cpu_buffer->resize_disabled);
4396 kfree(iter->event);
4397 kfree(iter);
4398}
4399EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
4400
4401
4402
4403
4404
4405
4406
4407
4408void ring_buffer_iter_advance(struct ring_buffer_iter *iter)
4409{
4410 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4411 unsigned long flags;
4412
4413 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4414
4415 rb_advance_iter(iter);
4416
4417 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4418}
4419EXPORT_SYMBOL_GPL(ring_buffer_iter_advance);
4420
4421
4422
4423
4424
4425
4426unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
4427{
4428
4429
4430
4431
4432
4433
4434 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4435 return 0;
4436
4437 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
4438}
4439EXPORT_SYMBOL_GPL(ring_buffer_size);
4440
4441static void
4442rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
4443{
4444 rb_head_page_deactivate(cpu_buffer);
4445
4446 cpu_buffer->head_page
4447 = list_entry(cpu_buffer->pages, struct buffer_page, list);
4448 local_set(&cpu_buffer->head_page->write, 0);
4449 local_set(&cpu_buffer->head_page->entries, 0);
4450 local_set(&cpu_buffer->head_page->page->commit, 0);
4451
4452 cpu_buffer->head_page->read = 0;
4453
4454 cpu_buffer->tail_page = cpu_buffer->head_page;
4455 cpu_buffer->commit_page = cpu_buffer->head_page;
4456
4457 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
4458 INIT_LIST_HEAD(&cpu_buffer->new_pages);
4459 local_set(&cpu_buffer->reader_page->write, 0);
4460 local_set(&cpu_buffer->reader_page->entries, 0);
4461 local_set(&cpu_buffer->reader_page->page->commit, 0);
4462 cpu_buffer->reader_page->read = 0;
4463
4464 local_set(&cpu_buffer->entries_bytes, 0);
4465 local_set(&cpu_buffer->overrun, 0);
4466 local_set(&cpu_buffer->commit_overrun, 0);
4467 local_set(&cpu_buffer->dropped_events, 0);
4468 local_set(&cpu_buffer->entries, 0);
4469 local_set(&cpu_buffer->committing, 0);
4470 local_set(&cpu_buffer->commits, 0);
4471 local_set(&cpu_buffer->pages_touched, 0);
4472 local_set(&cpu_buffer->pages_read, 0);
4473 cpu_buffer->last_pages_touch = 0;
4474 cpu_buffer->shortest_full = 0;
4475 cpu_buffer->read = 0;
4476 cpu_buffer->read_bytes = 0;
4477
4478 cpu_buffer->write_stamp = 0;
4479 cpu_buffer->read_stamp = 0;
4480
4481 cpu_buffer->lost_events = 0;
4482 cpu_buffer->last_overrun = 0;
4483
4484 rb_head_page_activate(cpu_buffer);
4485}
4486
4487
4488
4489
4490
4491
4492void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
4493{
4494 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4495 unsigned long flags;
4496
4497 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4498 return;
4499
4500 atomic_inc(&cpu_buffer->resize_disabled);
4501 atomic_inc(&cpu_buffer->record_disabled);
4502
4503
4504 synchronize_rcu();
4505
4506 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4507
4508 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
4509 goto out;
4510
4511 arch_spin_lock(&cpu_buffer->lock);
4512
4513 rb_reset_cpu(cpu_buffer);
4514
4515 arch_spin_unlock(&cpu_buffer->lock);
4516
4517 out:
4518 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4519
4520 atomic_dec(&cpu_buffer->record_disabled);
4521 atomic_dec(&cpu_buffer->resize_disabled);
4522}
4523EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
4524
4525
4526
4527
4528
4529void ring_buffer_reset(struct trace_buffer *buffer)
4530{
4531 int cpu;
4532
4533 for_each_buffer_cpu(buffer, cpu)
4534 ring_buffer_reset_cpu(buffer, cpu);
4535}
4536EXPORT_SYMBOL_GPL(ring_buffer_reset);
4537
4538
4539
4540
4541
4542bool ring_buffer_empty(struct trace_buffer *buffer)
4543{
4544 struct ring_buffer_per_cpu *cpu_buffer;
4545 unsigned long flags;
4546 bool dolock;
4547 int cpu;
4548 int ret;
4549
4550
4551 for_each_buffer_cpu(buffer, cpu) {
4552 cpu_buffer = buffer->buffers[cpu];
4553 local_irq_save(flags);
4554 dolock = rb_reader_lock(cpu_buffer);
4555 ret = rb_per_cpu_empty(cpu_buffer);
4556 rb_reader_unlock(cpu_buffer, dolock);
4557 local_irq_restore(flags);
4558
4559 if (!ret)
4560 return false;
4561 }
4562
4563 return true;
4564}
4565EXPORT_SYMBOL_GPL(ring_buffer_empty);
4566
4567
4568
4569
4570
4571
4572bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
4573{
4574 struct ring_buffer_per_cpu *cpu_buffer;
4575 unsigned long flags;
4576 bool dolock;
4577 int ret;
4578
4579 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4580 return true;
4581
4582 cpu_buffer = buffer->buffers[cpu];
4583 local_irq_save(flags);
4584 dolock = rb_reader_lock(cpu_buffer);
4585 ret = rb_per_cpu_empty(cpu_buffer);
4586 rb_reader_unlock(cpu_buffer, dolock);
4587 local_irq_restore(flags);
4588
4589 return ret;
4590}
4591EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
4592
4593#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
4606 struct trace_buffer *buffer_b, int cpu)
4607{
4608 struct ring_buffer_per_cpu *cpu_buffer_a;
4609 struct ring_buffer_per_cpu *cpu_buffer_b;
4610 int ret = -EINVAL;
4611
4612 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4613 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4614 goto out;
4615
4616 cpu_buffer_a = buffer_a->buffers[cpu];
4617 cpu_buffer_b = buffer_b->buffers[cpu];
4618
4619
4620 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4621 goto out;
4622
4623 ret = -EAGAIN;
4624
4625 if (atomic_read(&buffer_a->record_disabled))
4626 goto out;
4627
4628 if (atomic_read(&buffer_b->record_disabled))
4629 goto out;
4630
4631 if (atomic_read(&cpu_buffer_a->record_disabled))
4632 goto out;
4633
4634 if (atomic_read(&cpu_buffer_b->record_disabled))
4635 goto out;
4636
4637
4638
4639
4640
4641
4642
4643 atomic_inc(&cpu_buffer_a->record_disabled);
4644 atomic_inc(&cpu_buffer_b->record_disabled);
4645
4646 ret = -EBUSY;
4647 if (local_read(&cpu_buffer_a->committing))
4648 goto out_dec;
4649 if (local_read(&cpu_buffer_b->committing))
4650 goto out_dec;
4651
4652 buffer_a->buffers[cpu] = cpu_buffer_b;
4653 buffer_b->buffers[cpu] = cpu_buffer_a;
4654
4655 cpu_buffer_b->buffer = buffer_a;
4656 cpu_buffer_a->buffer = buffer_b;
4657
4658 ret = 0;
4659
4660out_dec:
4661 atomic_dec(&cpu_buffer_a->record_disabled);
4662 atomic_dec(&cpu_buffer_b->record_disabled);
4663out:
4664 return ret;
4665}
4666EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4667#endif
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
4686{
4687 struct ring_buffer_per_cpu *cpu_buffer;
4688 struct buffer_data_page *bpage = NULL;
4689 unsigned long flags;
4690 struct page *page;
4691
4692 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4693 return ERR_PTR(-ENODEV);
4694
4695 cpu_buffer = buffer->buffers[cpu];
4696 local_irq_save(flags);
4697 arch_spin_lock(&cpu_buffer->lock);
4698
4699 if (cpu_buffer->free_page) {
4700 bpage = cpu_buffer->free_page;
4701 cpu_buffer->free_page = NULL;
4702 }
4703
4704 arch_spin_unlock(&cpu_buffer->lock);
4705 local_irq_restore(flags);
4706
4707 if (bpage)
4708 goto out;
4709
4710 page = alloc_pages_node(cpu_to_node(cpu),
4711 GFP_KERNEL | __GFP_NORETRY, 0);
4712 if (!page)
4713 return ERR_PTR(-ENOMEM);
4714
4715 bpage = page_address(page);
4716
4717 out:
4718 rb_init_page(bpage);
4719
4720 return bpage;
4721}
4722EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data)
4733{
4734 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4735 struct buffer_data_page *bpage = data;
4736 struct page *page = virt_to_page(bpage);
4737 unsigned long flags;
4738
4739
4740 if (page_ref_count(page) > 1)
4741 goto out;
4742
4743 local_irq_save(flags);
4744 arch_spin_lock(&cpu_buffer->lock);
4745
4746 if (!cpu_buffer->free_page) {
4747 cpu_buffer->free_page = bpage;
4748 bpage = NULL;
4749 }
4750
4751 arch_spin_unlock(&cpu_buffer->lock);
4752 local_irq_restore(flags);
4753
4754 out:
4755 free_page((unsigned long)bpage);
4756}
4757EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792int ring_buffer_read_page(struct trace_buffer *buffer,
4793 void **data_page, size_t len, int cpu, int full)
4794{
4795 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4796 struct ring_buffer_event *event;
4797 struct buffer_data_page *bpage;
4798 struct buffer_page *reader;
4799 unsigned long missed_events;
4800 unsigned long flags;
4801 unsigned int commit;
4802 unsigned int read;
4803 u64 save_timestamp;
4804 int ret = -1;
4805
4806 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4807 goto out;
4808
4809
4810
4811
4812
4813 if (len <= BUF_PAGE_HDR_SIZE)
4814 goto out;
4815
4816 len -= BUF_PAGE_HDR_SIZE;
4817
4818 if (!data_page)
4819 goto out;
4820
4821 bpage = *data_page;
4822 if (!bpage)
4823 goto out;
4824
4825 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4826
4827 reader = rb_get_reader_page(cpu_buffer);
4828 if (!reader)
4829 goto out_unlock;
4830
4831 event = rb_reader_event(cpu_buffer);
4832
4833 read = reader->read;
4834 commit = rb_page_commit(reader);
4835
4836
4837 missed_events = cpu_buffer->lost_events;
4838
4839
4840
4841
4842
4843
4844
4845
4846 if (read || (len < (commit - read)) ||
4847 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4848 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4849 unsigned int rpos = read;
4850 unsigned int pos = 0;
4851 unsigned int size;
4852
4853 if (full)
4854 goto out_unlock;
4855
4856 if (len > (commit - read))
4857 len = (commit - read);
4858
4859
4860 size = rb_event_ts_length(event);
4861
4862 if (len < size)
4863 goto out_unlock;
4864
4865
4866 save_timestamp = cpu_buffer->read_stamp;
4867
4868
4869 do {
4870
4871
4872
4873
4874
4875
4876 size = rb_event_length(event);
4877 memcpy(bpage->data + pos, rpage->data + rpos, size);
4878
4879 len -= size;
4880
4881 rb_advance_reader(cpu_buffer);
4882 rpos = reader->read;
4883 pos += size;
4884
4885 if (rpos >= commit)
4886 break;
4887
4888 event = rb_reader_event(cpu_buffer);
4889
4890 size = rb_event_ts_length(event);
4891 } while (len >= size);
4892
4893
4894 local_set(&bpage->commit, pos);
4895 bpage->time_stamp = save_timestamp;
4896
4897
4898 read = 0;
4899 } else {
4900
4901 cpu_buffer->read += rb_page_entries(reader);
4902 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4903
4904
4905 rb_init_page(bpage);
4906 bpage = reader->page;
4907 reader->page = *data_page;
4908 local_set(&reader->write, 0);
4909 local_set(&reader->entries, 0);
4910 reader->read = 0;
4911 *data_page = bpage;
4912
4913
4914
4915
4916
4917
4918 if (reader->real_end)
4919 local_set(&bpage->commit, reader->real_end);
4920 }
4921 ret = read;
4922
4923 cpu_buffer->lost_events = 0;
4924
4925 commit = local_read(&bpage->commit);
4926
4927
4928
4929 if (missed_events) {
4930
4931
4932
4933 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4934 memcpy(&bpage->data[commit], &missed_events,
4935 sizeof(missed_events));
4936 local_add(RB_MISSED_STORED, &bpage->commit);
4937 commit += sizeof(missed_events);
4938 }
4939 local_add(RB_MISSED_EVENTS, &bpage->commit);
4940 }
4941
4942
4943
4944
4945 if (commit < BUF_PAGE_SIZE)
4946 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4947
4948 out_unlock:
4949 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4950
4951 out:
4952 return ret;
4953}
4954EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4955
4956
4957
4958
4959
4960
4961int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
4962{
4963 struct trace_buffer *buffer;
4964 long nr_pages_same;
4965 int cpu_i;
4966 unsigned long nr_pages;
4967
4968 buffer = container_of(node, struct trace_buffer, node);
4969 if (cpumask_test_cpu(cpu, buffer->cpumask))
4970 return 0;
4971
4972 nr_pages = 0;
4973 nr_pages_same = 1;
4974
4975 for_each_buffer_cpu(buffer, cpu_i) {
4976
4977 if (nr_pages == 0)
4978 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4979 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4980 nr_pages_same = 0;
4981 break;
4982 }
4983 }
4984
4985 if (!nr_pages_same)
4986 nr_pages = 2;
4987 buffer->buffers[cpu] =
4988 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4989 if (!buffer->buffers[cpu]) {
4990 WARN(1, "failed to allocate ring buffer on CPU %u\n",
4991 cpu);
4992 return -ENOMEM;
4993 }
4994 smp_wmb();
4995 cpumask_set_cpu(cpu, buffer->cpumask);
4996 return 0;
4997}
4998
4999#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015static struct task_struct *rb_threads[NR_CPUS] __initdata;
5016
5017struct rb_test_data {
5018 struct trace_buffer *buffer;
5019 unsigned long events;
5020 unsigned long bytes_written;
5021 unsigned long bytes_alloc;
5022 unsigned long bytes_dropped;
5023 unsigned long events_nested;
5024 unsigned long bytes_written_nested;
5025 unsigned long bytes_alloc_nested;
5026 unsigned long bytes_dropped_nested;
5027 int min_size_nested;
5028 int max_size_nested;
5029 int max_size;
5030 int min_size;
5031 int cpu;
5032 int cnt;
5033};
5034
5035static struct rb_test_data rb_data[NR_CPUS] __initdata;
5036
5037
5038#define RB_TEST_BUFFER_SIZE 1048576
5039
5040static char rb_string[] __initdata =
5041 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
5042 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
5043 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
5044
5045static bool rb_test_started __initdata;
5046
5047struct rb_item {
5048 int size;
5049 char str[];
5050};
5051
5052static __init int rb_write_something(struct rb_test_data *data, bool nested)
5053{
5054 struct ring_buffer_event *event;
5055 struct rb_item *item;
5056 bool started;
5057 int event_len;
5058 int size;
5059 int len;
5060 int cnt;
5061
5062
5063 cnt = data->cnt + (nested ? 27 : 0);
5064
5065
5066 size = (cnt * 68 / 25) % (sizeof(rb_string) - 1);
5067
5068 len = size + sizeof(struct rb_item);
5069
5070 started = rb_test_started;
5071
5072 smp_rmb();
5073
5074 event = ring_buffer_lock_reserve(data->buffer, len);
5075 if (!event) {
5076
5077 if (started) {
5078 if (nested)
5079 data->bytes_dropped += len;
5080 else
5081 data->bytes_dropped_nested += len;
5082 }
5083 return len;
5084 }
5085
5086 event_len = ring_buffer_event_length(event);
5087
5088 if (RB_WARN_ON(data->buffer, event_len < len))
5089 goto out;
5090
5091 item = ring_buffer_event_data(event);
5092 item->size = size;
5093 memcpy(item->str, rb_string, size);
5094
5095 if (nested) {
5096 data->bytes_alloc_nested += event_len;
5097 data->bytes_written_nested += len;
5098 data->events_nested++;
5099 if (!data->min_size_nested || len < data->min_size_nested)
5100 data->min_size_nested = len;
5101 if (len > data->max_size_nested)
5102 data->max_size_nested = len;
5103 } else {
5104 data->bytes_alloc += event_len;
5105 data->bytes_written += len;
5106 data->events++;
5107 if (!data->min_size || len < data->min_size)
5108 data->max_size = len;
5109 if (len > data->max_size)
5110 data->max_size = len;
5111 }
5112
5113 out:
5114 ring_buffer_unlock_commit(data->buffer, event);
5115
5116 return 0;
5117}
5118
5119static __init int rb_test(void *arg)
5120{
5121 struct rb_test_data *data = arg;
5122
5123 while (!kthread_should_stop()) {
5124 rb_write_something(data, false);
5125 data->cnt++;
5126
5127 set_current_state(TASK_INTERRUPTIBLE);
5128
5129 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
5130 }
5131
5132 return 0;
5133}
5134
5135static __init void rb_ipi(void *ignore)
5136{
5137 struct rb_test_data *data;
5138 int cpu = smp_processor_id();
5139
5140 data = &rb_data[cpu];
5141 rb_write_something(data, true);
5142}
5143
5144static __init int rb_hammer_test(void *arg)
5145{
5146 while (!kthread_should_stop()) {
5147
5148
5149 smp_call_function(rb_ipi, NULL, 1);
5150
5151 schedule();
5152 }
5153
5154 return 0;
5155}
5156
5157static __init int test_ringbuffer(void)
5158{
5159 struct task_struct *rb_hammer;
5160 struct trace_buffer *buffer;
5161 int cpu;
5162 int ret = 0;
5163
5164 if (security_locked_down(LOCKDOWN_TRACEFS)) {
5165 pr_warn("Lockdown is enabled, skipping ring buffer tests\n");
5166 return 0;
5167 }
5168
5169 pr_info("Running ring buffer tests...\n");
5170
5171 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
5172 if (WARN_ON(!buffer))
5173 return 0;
5174
5175
5176 ring_buffer_record_off(buffer);
5177
5178 for_each_online_cpu(cpu) {
5179 rb_data[cpu].buffer = buffer;
5180 rb_data[cpu].cpu = cpu;
5181 rb_data[cpu].cnt = cpu;
5182 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
5183 "rbtester/%d", cpu);
5184 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
5185 pr_cont("FAILED\n");
5186 ret = PTR_ERR(rb_threads[cpu]);
5187 goto out_free;
5188 }
5189
5190 kthread_bind(rb_threads[cpu], cpu);
5191 wake_up_process(rb_threads[cpu]);
5192 }
5193
5194
5195 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
5196 if (WARN_ON(IS_ERR(rb_hammer))) {
5197 pr_cont("FAILED\n");
5198 ret = PTR_ERR(rb_hammer);
5199 goto out_free;
5200 }
5201
5202 ring_buffer_record_on(buffer);
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212 smp_wmb();
5213 rb_test_started = true;
5214
5215 set_current_state(TASK_INTERRUPTIBLE);
5216 ;
5217 schedule_timeout(10 * HZ);
5218
5219 kthread_stop(rb_hammer);
5220
5221 out_free:
5222 for_each_online_cpu(cpu) {
5223 if (!rb_threads[cpu])
5224 break;
5225 kthread_stop(rb_threads[cpu]);
5226 }
5227 if (ret) {
5228 ring_buffer_free(buffer);
5229 return ret;
5230 }
5231
5232
5233 pr_info("finished\n");
5234 for_each_online_cpu(cpu) {
5235 struct ring_buffer_event *event;
5236 struct rb_test_data *data = &rb_data[cpu];
5237 struct rb_item *item;
5238 unsigned long total_events;
5239 unsigned long total_dropped;
5240 unsigned long total_written;
5241 unsigned long total_alloc;
5242 unsigned long total_read = 0;
5243 unsigned long total_size = 0;
5244 unsigned long total_len = 0;
5245 unsigned long total_lost = 0;
5246 unsigned long lost;
5247 int big_event_size;
5248 int small_event_size;
5249
5250 ret = -1;
5251
5252 total_events = data->events + data->events_nested;
5253 total_written = data->bytes_written + data->bytes_written_nested;
5254 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
5255 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
5256
5257 big_event_size = data->max_size + data->max_size_nested;
5258 small_event_size = data->min_size + data->min_size_nested;
5259
5260 pr_info("CPU %d:\n", cpu);
5261 pr_info(" events: %ld\n", total_events);
5262 pr_info(" dropped bytes: %ld\n", total_dropped);
5263 pr_info(" alloced bytes: %ld\n", total_alloc);
5264 pr_info(" written bytes: %ld\n", total_written);
5265 pr_info(" biggest event: %d\n", big_event_size);
5266 pr_info(" smallest event: %d\n", small_event_size);
5267
5268 if (RB_WARN_ON(buffer, total_dropped))
5269 break;
5270
5271 ret = 0;
5272
5273 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
5274 total_lost += lost;
5275 item = ring_buffer_event_data(event);
5276 total_len += ring_buffer_event_length(event);
5277 total_size += item->size + sizeof(struct rb_item);
5278 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
5279 pr_info("FAILED!\n");
5280 pr_info("buffer had: %.*s\n", item->size, item->str);
5281 pr_info("expected: %.*s\n", item->size, rb_string);
5282 RB_WARN_ON(buffer, 1);
5283 ret = -1;
5284 break;
5285 }
5286 total_read++;
5287 }
5288 if (ret)
5289 break;
5290
5291 ret = -1;
5292
5293 pr_info(" read events: %ld\n", total_read);
5294 pr_info(" lost events: %ld\n", total_lost);
5295 pr_info(" total events: %ld\n", total_lost + total_read);
5296 pr_info(" recorded len bytes: %ld\n", total_len);
5297 pr_info(" recorded size bytes: %ld\n", total_size);
5298 if (total_lost)
5299 pr_info(" With dropped events, record len and size may not match\n"
5300 " alloced and written from above\n");
5301 if (!total_lost) {
5302 if (RB_WARN_ON(buffer, total_len != total_alloc ||
5303 total_size != total_written))
5304 break;
5305 }
5306 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
5307 break;
5308
5309 ret = 0;
5310 }
5311 if (!ret)
5312 pr_info("Ring buffer PASSED!\n");
5313
5314 ring_buffer_free(buffer);
5315 return 0;
5316}
5317
5318late_initcall(test_ringbuffer);
5319#endif
5320