1
2
3
4
5
6#include <linux/trace_events.h>
7#include <linux/ring_buffer.h>
8#include <linux/trace_clock.h>
9#include <linux/sched/clock.h>
10#include <linux/trace_seq.h>
11#include <linux/spinlock.h>
12#include <linux/irq_work.h>
13#include <linux/uaccess.h>
14#include <linux/hardirq.h>
15#include <linux/kthread.h>
16#include <linux/kmemcheck.h>
17#include <linux/module.h>
18#include <linux/percpu.h>
19#include <linux/mutex.h>
20#include <linux/delay.h>
21#include <linux/slab.h>
22#include <linux/init.h>
23#include <linux/hash.h>
24#include <linux/list.h>
25#include <linux/cpu.h>
26
27#include <asm/local.h>
28
29static void update_pages_handler(struct work_struct *work);
30
31
32
33
34int ring_buffer_print_entry_header(struct trace_seq *s)
35{
36 trace_seq_puts(s, "# compressed entry header\n");
37 trace_seq_puts(s, "\ttype_len : 5 bits\n");
38 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
39 trace_seq_puts(s, "\tarray : 32 bits\n");
40 trace_seq_putc(s, '\n');
41 trace_seq_printf(s, "\tpadding : type == %d\n",
42 RINGBUF_TYPE_PADDING);
43 trace_seq_printf(s, "\ttime_extend : type == %d\n",
44 RINGBUF_TYPE_TIME_EXTEND);
45 trace_seq_printf(s, "\tdata max type_len == %d\n",
46 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
47
48 return !trace_seq_has_overflowed(s);
49}
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120#define RB_BUFFER_OFF (1 << 20)
121
122#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
123
124#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
125#define RB_ALIGNMENT 4U
126#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
127#define RB_EVNT_MIN_SIZE 8U
128
129#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
130# define RB_FORCE_8BYTE_ALIGNMENT 0
131# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
132#else
133# define RB_FORCE_8BYTE_ALIGNMENT 1
134# define RB_ARCH_ALIGNMENT 8U
135#endif
136
137#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
138
139
140#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
141
142enum {
143 RB_LEN_TIME_EXTEND = 8,
144 RB_LEN_TIME_STAMP = 16,
145};
146
147#define skip_time_extend(event) \
148 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
149
150static inline int rb_null_event(struct ring_buffer_event *event)
151{
152 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
153}
154
155static void rb_event_set_padding(struct ring_buffer_event *event)
156{
157
158 event->type_len = RINGBUF_TYPE_PADDING;
159 event->time_delta = 0;
160}
161
162static unsigned
163rb_event_data_length(struct ring_buffer_event *event)
164{
165 unsigned length;
166
167 if (event->type_len)
168 length = event->type_len * RB_ALIGNMENT;
169 else
170 length = event->array[0];
171 return length + RB_EVNT_HDR_SIZE;
172}
173
174
175
176
177
178
179static inline unsigned
180rb_event_length(struct ring_buffer_event *event)
181{
182 switch (event->type_len) {
183 case RINGBUF_TYPE_PADDING:
184 if (rb_null_event(event))
185
186 return -1;
187 return event->array[0] + RB_EVNT_HDR_SIZE;
188
189 case RINGBUF_TYPE_TIME_EXTEND:
190 return RB_LEN_TIME_EXTEND;
191
192 case RINGBUF_TYPE_TIME_STAMP:
193 return RB_LEN_TIME_STAMP;
194
195 case RINGBUF_TYPE_DATA:
196 return rb_event_data_length(event);
197 default:
198 BUG();
199 }
200
201 return 0;
202}
203
204
205
206
207
208static inline unsigned
209rb_event_ts_length(struct ring_buffer_event *event)
210{
211 unsigned len = 0;
212
213 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
214
215 len = RB_LEN_TIME_EXTEND;
216 event = skip_time_extend(event);
217 }
218 return len + rb_event_length(event);
219}
220
221
222
223
224
225
226
227
228
229
230
231unsigned ring_buffer_event_length(struct ring_buffer_event *event)
232{
233 unsigned length;
234
235 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
236 event = skip_time_extend(event);
237
238 length = rb_event_length(event);
239 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
240 return length;
241 length -= RB_EVNT_HDR_SIZE;
242 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
243 length -= sizeof(event->array[0]);
244 return length;
245}
246EXPORT_SYMBOL_GPL(ring_buffer_event_length);
247
248
249static __always_inline void *
250rb_event_data(struct ring_buffer_event *event)
251{
252 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
253 event = skip_time_extend(event);
254 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
255
256 if (event->type_len)
257 return (void *)&event->array[0];
258
259 return (void *)&event->array[1];
260}
261
262
263
264
265
266void *ring_buffer_event_data(struct ring_buffer_event *event)
267{
268 return rb_event_data(event);
269}
270EXPORT_SYMBOL_GPL(ring_buffer_event_data);
271
272#define for_each_buffer_cpu(buffer, cpu) \
273 for_each_cpu(cpu, buffer->cpumask)
274
275#define TS_SHIFT 27
276#define TS_MASK ((1ULL << TS_SHIFT) - 1)
277#define TS_DELTA_TEST (~TS_MASK)
278
279
280#define RB_MISSED_EVENTS (1 << 31)
281
282#define RB_MISSED_STORED (1 << 30)
283
284struct buffer_data_page {
285 u64 time_stamp;
286 local_t commit;
287 unsigned char data[] RB_ALIGN_DATA;
288};
289
290
291
292
293
294
295
296
297
298struct buffer_page {
299 struct list_head list;
300 local_t write;
301 unsigned read;
302 local_t entries;
303 unsigned long real_end;
304 struct buffer_data_page *page;
305};
306
307
308
309
310
311
312
313
314
315
316
317
318
319#define RB_WRITE_MASK 0xfffff
320#define RB_WRITE_INTCNT (1 << 20)
321
322static void rb_init_page(struct buffer_data_page *bpage)
323{
324 local_set(&bpage->commit, 0);
325}
326
327
328
329
330
331
332
333size_t ring_buffer_page_len(void *page)
334{
335 return local_read(&((struct buffer_data_page *)page)->commit)
336 + BUF_PAGE_HDR_SIZE;
337}
338
339
340
341
342
343static void free_buffer_page(struct buffer_page *bpage)
344{
345 free_page((unsigned long)bpage->page);
346 kfree(bpage);
347}
348
349
350
351
352static inline int test_time_stamp(u64 delta)
353{
354 if (delta & TS_DELTA_TEST)
355 return 1;
356 return 0;
357}
358
359#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
360
361
362#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
363
364int ring_buffer_print_page_header(struct trace_seq *s)
365{
366 struct buffer_data_page field;
367
368 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
369 "offset:0;\tsize:%u;\tsigned:%u;\n",
370 (unsigned int)sizeof(field.time_stamp),
371 (unsigned int)is_signed_type(u64));
372
373 trace_seq_printf(s, "\tfield: local_t commit;\t"
374 "offset:%u;\tsize:%u;\tsigned:%u;\n",
375 (unsigned int)offsetof(typeof(field), commit),
376 (unsigned int)sizeof(field.commit),
377 (unsigned int)is_signed_type(long));
378
379 trace_seq_printf(s, "\tfield: int overwrite;\t"
380 "offset:%u;\tsize:%u;\tsigned:%u;\n",
381 (unsigned int)offsetof(typeof(field), commit),
382 1,
383 (unsigned int)is_signed_type(long));
384
385 trace_seq_printf(s, "\tfield: char data;\t"
386 "offset:%u;\tsize:%u;\tsigned:%u;\n",
387 (unsigned int)offsetof(typeof(field), data),
388 (unsigned int)BUF_PAGE_SIZE,
389 (unsigned int)is_signed_type(char));
390
391 return !trace_seq_has_overflowed(s);
392}
393
394struct rb_irq_work {
395 struct irq_work work;
396 wait_queue_head_t waiters;
397 wait_queue_head_t full_waiters;
398 bool waiters_pending;
399 bool full_waiters_pending;
400 bool wakeup_full;
401};
402
403
404
405
406struct rb_event_info {
407 u64 ts;
408 u64 delta;
409 unsigned long length;
410 struct buffer_page *tail_page;
411 int add_timestamp;
412};
413
414
415
416
417
418
419
420
421
422
423enum {
424 RB_CTX_NMI,
425 RB_CTX_IRQ,
426 RB_CTX_SOFTIRQ,
427 RB_CTX_NORMAL,
428 RB_CTX_MAX
429};
430
431
432
433
434struct ring_buffer_per_cpu {
435 int cpu;
436 atomic_t record_disabled;
437 struct ring_buffer *buffer;
438 raw_spinlock_t reader_lock;
439 arch_spinlock_t lock;
440 struct lock_class_key lock_key;
441 struct buffer_data_page *free_page;
442 unsigned long nr_pages;
443 unsigned int current_context;
444 struct list_head *pages;
445 struct buffer_page *head_page;
446 struct buffer_page *tail_page;
447 struct buffer_page *commit_page;
448 struct buffer_page *reader_page;
449 unsigned long lost_events;
450 unsigned long last_overrun;
451 local_t entries_bytes;
452 local_t entries;
453 local_t overrun;
454 local_t commit_overrun;
455 local_t dropped_events;
456 local_t committing;
457 local_t commits;
458 unsigned long read;
459 unsigned long read_bytes;
460 u64 write_stamp;
461 u64 read_stamp;
462
463 long nr_pages_to_update;
464 struct list_head new_pages;
465 struct work_struct update_pages_work;
466 struct completion update_done;
467
468 struct rb_irq_work irq_work;
469};
470
471struct ring_buffer {
472 unsigned flags;
473 int cpus;
474 atomic_t record_disabled;
475 atomic_t resize_disabled;
476 cpumask_var_t cpumask;
477
478 struct lock_class_key *reader_lock_key;
479
480 struct mutex mutex;
481
482 struct ring_buffer_per_cpu **buffers;
483
484 struct hlist_node node;
485 u64 (*clock)(void);
486
487 struct rb_irq_work irq_work;
488};
489
490struct ring_buffer_iter {
491 struct ring_buffer_per_cpu *cpu_buffer;
492 unsigned long head;
493 struct buffer_page *head_page;
494 struct buffer_page *cache_reader_page;
495 unsigned long cache_read;
496 u64 read_stamp;
497};
498
499
500
501
502
503
504
505static void rb_wake_up_waiters(struct irq_work *work)
506{
507 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
508
509 wake_up_all(&rbwork->waiters);
510 if (rbwork->wakeup_full) {
511 rbwork->wakeup_full = false;
512 wake_up_all(&rbwork->full_waiters);
513 }
514}
515
516
517
518
519
520
521
522
523
524
525
526int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
527{
528 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
529 DEFINE_WAIT(wait);
530 struct rb_irq_work *work;
531 int ret = 0;
532
533
534
535
536
537
538 if (cpu == RING_BUFFER_ALL_CPUS) {
539 work = &buffer->irq_work;
540
541 full = false;
542 } else {
543 if (!cpumask_test_cpu(cpu, buffer->cpumask))
544 return -ENODEV;
545 cpu_buffer = buffer->buffers[cpu];
546 work = &cpu_buffer->irq_work;
547 }
548
549
550 while (true) {
551 if (full)
552 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
553 else
554 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576 if (full)
577 work->full_waiters_pending = true;
578 else
579 work->waiters_pending = true;
580
581 if (signal_pending(current)) {
582 ret = -EINTR;
583 break;
584 }
585
586 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
587 break;
588
589 if (cpu != RING_BUFFER_ALL_CPUS &&
590 !ring_buffer_empty_cpu(buffer, cpu)) {
591 unsigned long flags;
592 bool pagebusy;
593
594 if (!full)
595 break;
596
597 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
598 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
599 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
600
601 if (!pagebusy)
602 break;
603 }
604
605 schedule();
606 }
607
608 if (full)
609 finish_wait(&work->full_waiters, &wait);
610 else
611 finish_wait(&work->waiters, &wait);
612
613 return ret;
614}
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
631 struct file *filp, poll_table *poll_table)
632{
633 struct ring_buffer_per_cpu *cpu_buffer;
634 struct rb_irq_work *work;
635
636 if (cpu == RING_BUFFER_ALL_CPUS)
637 work = &buffer->irq_work;
638 else {
639 if (!cpumask_test_cpu(cpu, buffer->cpumask))
640 return -EINVAL;
641
642 cpu_buffer = buffer->buffers[cpu];
643 work = &cpu_buffer->irq_work;
644 }
645
646 poll_wait(filp, &work->waiters, poll_table);
647 work->waiters_pending = true;
648
649
650
651
652
653
654
655
656
657
658
659
660
661 smp_mb();
662
663 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
664 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
665 return POLLIN | POLLRDNORM;
666 return 0;
667}
668
669
670#define RB_WARN_ON(b, cond) \
671 ({ \
672 int _____ret = unlikely(cond); \
673 if (_____ret) { \
674 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
675 struct ring_buffer_per_cpu *__b = \
676 (void *)b; \
677 atomic_inc(&__b->buffer->record_disabled); \
678 } else \
679 atomic_inc(&b->record_disabled); \
680 WARN_ON(1); \
681 } \
682 _____ret; \
683 })
684
685
686#define DEBUG_SHIFT 0
687
688static inline u64 rb_time_stamp(struct ring_buffer *buffer)
689{
690
691 return buffer->clock() << DEBUG_SHIFT;
692}
693
694u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
695{
696 u64 time;
697
698 preempt_disable_notrace();
699 time = rb_time_stamp(buffer);
700 preempt_enable_no_resched_notrace();
701
702 return time;
703}
704EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
705
706void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
707 int cpu, u64 *ts)
708{
709
710 *ts >>= DEBUG_SHIFT;
711}
712EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783#define RB_PAGE_NORMAL 0UL
784#define RB_PAGE_HEAD 1UL
785#define RB_PAGE_UPDATE 2UL
786
787
788#define RB_FLAG_MASK 3UL
789
790
791#define RB_PAGE_MOVED 4UL
792
793
794
795
796static struct list_head *rb_list_head(struct list_head *list)
797{
798 unsigned long val = (unsigned long)list;
799
800 return (struct list_head *)(val & ~RB_FLAG_MASK);
801}
802
803
804
805
806
807
808
809
810
811static inline int
812rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
813 struct buffer_page *page, struct list_head *list)
814{
815 unsigned long val;
816
817 val = (unsigned long)list->next;
818
819 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
820 return RB_PAGE_MOVED;
821
822 return val & RB_FLAG_MASK;
823}
824
825
826
827
828
829
830
831
832static bool rb_is_reader_page(struct buffer_page *page)
833{
834 struct list_head *list = page->list.prev;
835
836 return rb_list_head(list->next) != &page->list;
837}
838
839
840
841
842static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
843 struct list_head *list)
844{
845 unsigned long *ptr;
846
847 ptr = (unsigned long *)&list->next;
848 *ptr |= RB_PAGE_HEAD;
849 *ptr &= ~RB_PAGE_UPDATE;
850}
851
852
853
854
855static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
856{
857 struct buffer_page *head;
858
859 head = cpu_buffer->head_page;
860 if (!head)
861 return;
862
863
864
865
866 rb_set_list_to_head(cpu_buffer, head->list.prev);
867}
868
869static void rb_list_head_clear(struct list_head *list)
870{
871 unsigned long *ptr = (unsigned long *)&list->next;
872
873 *ptr &= ~RB_FLAG_MASK;
874}
875
876
877
878
879static void
880rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
881{
882 struct list_head *hd;
883
884
885 rb_list_head_clear(cpu_buffer->pages);
886
887 list_for_each(hd, cpu_buffer->pages)
888 rb_list_head_clear(hd);
889}
890
891static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
892 struct buffer_page *head,
893 struct buffer_page *prev,
894 int old_flag, int new_flag)
895{
896 struct list_head *list;
897 unsigned long val = (unsigned long)&head->list;
898 unsigned long ret;
899
900 list = &prev->list;
901
902 val &= ~RB_FLAG_MASK;
903
904 ret = cmpxchg((unsigned long *)&list->next,
905 val | old_flag, val | new_flag);
906
907
908 if ((ret & ~RB_FLAG_MASK) != val)
909 return RB_PAGE_MOVED;
910
911 return ret & RB_FLAG_MASK;
912}
913
914static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
915 struct buffer_page *head,
916 struct buffer_page *prev,
917 int old_flag)
918{
919 return rb_head_page_set(cpu_buffer, head, prev,
920 old_flag, RB_PAGE_UPDATE);
921}
922
923static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
924 struct buffer_page *head,
925 struct buffer_page *prev,
926 int old_flag)
927{
928 return rb_head_page_set(cpu_buffer, head, prev,
929 old_flag, RB_PAGE_HEAD);
930}
931
932static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
933 struct buffer_page *head,
934 struct buffer_page *prev,
935 int old_flag)
936{
937 return rb_head_page_set(cpu_buffer, head, prev,
938 old_flag, RB_PAGE_NORMAL);
939}
940
941static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
942 struct buffer_page **bpage)
943{
944 struct list_head *p = rb_list_head((*bpage)->list.next);
945
946 *bpage = list_entry(p, struct buffer_page, list);
947}
948
949static struct buffer_page *
950rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
951{
952 struct buffer_page *head;
953 struct buffer_page *page;
954 struct list_head *list;
955 int i;
956
957 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
958 return NULL;
959
960
961 list = cpu_buffer->pages;
962 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
963 return NULL;
964
965 page = head = cpu_buffer->head_page;
966
967
968
969
970
971
972 for (i = 0; i < 3; i++) {
973 do {
974 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
975 cpu_buffer->head_page = page;
976 return page;
977 }
978 rb_inc_page(cpu_buffer, &page);
979 } while (page != head);
980 }
981
982 RB_WARN_ON(cpu_buffer, 1);
983
984 return NULL;
985}
986
987static int rb_head_page_replace(struct buffer_page *old,
988 struct buffer_page *new)
989{
990 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
991 unsigned long val;
992 unsigned long ret;
993
994 val = *ptr & ~RB_FLAG_MASK;
995 val |= RB_PAGE_HEAD;
996
997 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
998
999 return ret == val;
1000}
1001
1002
1003
1004
1005static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1006 struct buffer_page *tail_page,
1007 struct buffer_page *next_page)
1008{
1009 unsigned long old_entries;
1010 unsigned long old_write;
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1022 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1023
1024
1025
1026
1027
1028 barrier();
1029
1030
1031
1032
1033
1034
1035 if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
1036
1037 unsigned long val = old_write & ~RB_WRITE_MASK;
1038 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050 (void)local_cmpxchg(&next_page->write, old_write, val);
1051 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1052
1053
1054
1055
1056
1057
1058 local_set(&next_page->page->commit, 0);
1059
1060
1061 (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
1062 }
1063}
1064
1065static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1066 struct buffer_page *bpage)
1067{
1068 unsigned long val = (unsigned long)bpage;
1069
1070 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1071 return 1;
1072
1073 return 0;
1074}
1075
1076
1077
1078
1079static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1080 struct list_head *list)
1081{
1082 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1083 return 1;
1084 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1085 return 1;
1086 return 0;
1087}
1088
1089
1090
1091
1092
1093
1094
1095
1096static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1097{
1098 struct list_head *head = cpu_buffer->pages;
1099 struct buffer_page *bpage, *tmp;
1100
1101
1102 if (cpu_buffer->head_page)
1103 rb_set_head_page(cpu_buffer);
1104
1105 rb_head_page_deactivate(cpu_buffer);
1106
1107 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1108 return -1;
1109 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1110 return -1;
1111
1112 if (rb_check_list(cpu_buffer, head))
1113 return -1;
1114
1115 list_for_each_entry_safe(bpage, tmp, head, list) {
1116 if (RB_WARN_ON(cpu_buffer,
1117 bpage->list.next->prev != &bpage->list))
1118 return -1;
1119 if (RB_WARN_ON(cpu_buffer,
1120 bpage->list.prev->next != &bpage->list))
1121 return -1;
1122 if (rb_check_list(cpu_buffer, &bpage->list))
1123 return -1;
1124 }
1125
1126 rb_head_page_activate(cpu_buffer);
1127
1128 return 0;
1129}
1130
1131static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
1132{
1133 struct buffer_page *bpage, *tmp;
1134 long i;
1135
1136 for (i = 0; i < nr_pages; i++) {
1137 struct page *page;
1138
1139
1140
1141
1142
1143 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1144 GFP_KERNEL | __GFP_RETRY_MAYFAIL,
1145 cpu_to_node(cpu));
1146 if (!bpage)
1147 goto free_pages;
1148
1149 list_add(&bpage->list, pages);
1150
1151 page = alloc_pages_node(cpu_to_node(cpu),
1152 GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
1153 if (!page)
1154 goto free_pages;
1155 bpage->page = page_address(page);
1156 rb_init_page(bpage->page);
1157 }
1158
1159 return 0;
1160
1161free_pages:
1162 list_for_each_entry_safe(bpage, tmp, pages, list) {
1163 list_del_init(&bpage->list);
1164 free_buffer_page(bpage);
1165 }
1166
1167 return -ENOMEM;
1168}
1169
1170static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1171 unsigned long nr_pages)
1172{
1173 LIST_HEAD(pages);
1174
1175 WARN_ON(!nr_pages);
1176
1177 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1178 return -ENOMEM;
1179
1180
1181
1182
1183
1184
1185 cpu_buffer->pages = pages.next;
1186 list_del(&pages);
1187
1188 cpu_buffer->nr_pages = nr_pages;
1189
1190 rb_check_pages(cpu_buffer);
1191
1192 return 0;
1193}
1194
1195static struct ring_buffer_per_cpu *
1196rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu)
1197{
1198 struct ring_buffer_per_cpu *cpu_buffer;
1199 struct buffer_page *bpage;
1200 struct page *page;
1201 int ret;
1202
1203 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1204 GFP_KERNEL, cpu_to_node(cpu));
1205 if (!cpu_buffer)
1206 return NULL;
1207
1208 cpu_buffer->cpu = cpu;
1209 cpu_buffer->buffer = buffer;
1210 raw_spin_lock_init(&cpu_buffer->reader_lock);
1211 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1212 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1213 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1214 init_completion(&cpu_buffer->update_done);
1215 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1216 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1217 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1218
1219 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1220 GFP_KERNEL, cpu_to_node(cpu));
1221 if (!bpage)
1222 goto fail_free_buffer;
1223
1224 rb_check_bpage(cpu_buffer, bpage);
1225
1226 cpu_buffer->reader_page = bpage;
1227 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1228 if (!page)
1229 goto fail_free_reader;
1230 bpage->page = page_address(page);
1231 rb_init_page(bpage->page);
1232
1233 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1234 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1235
1236 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1237 if (ret < 0)
1238 goto fail_free_reader;
1239
1240 cpu_buffer->head_page
1241 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1242 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1243
1244 rb_head_page_activate(cpu_buffer);
1245
1246 return cpu_buffer;
1247
1248 fail_free_reader:
1249 free_buffer_page(cpu_buffer->reader_page);
1250
1251 fail_free_buffer:
1252 kfree(cpu_buffer);
1253 return NULL;
1254}
1255
1256static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1257{
1258 struct list_head *head = cpu_buffer->pages;
1259 struct buffer_page *bpage, *tmp;
1260
1261 free_buffer_page(cpu_buffer->reader_page);
1262
1263 rb_head_page_deactivate(cpu_buffer);
1264
1265 if (head) {
1266 list_for_each_entry_safe(bpage, tmp, head, list) {
1267 list_del_init(&bpage->list);
1268 free_buffer_page(bpage);
1269 }
1270 bpage = list_entry(head, struct buffer_page, list);
1271 free_buffer_page(bpage);
1272 }
1273
1274 kfree(cpu_buffer);
1275}
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1288 struct lock_class_key *key)
1289{
1290 struct ring_buffer *buffer;
1291 long nr_pages;
1292 int bsize;
1293 int cpu;
1294 int ret;
1295
1296
1297 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1298 GFP_KERNEL);
1299 if (!buffer)
1300 return NULL;
1301
1302 if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1303 goto fail_free_buffer;
1304
1305 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1306 buffer->flags = flags;
1307 buffer->clock = trace_clock_local;
1308 buffer->reader_lock_key = key;
1309
1310 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1311 init_waitqueue_head(&buffer->irq_work.waiters);
1312
1313
1314 if (nr_pages < 2)
1315 nr_pages = 2;
1316
1317 buffer->cpus = nr_cpu_ids;
1318
1319 bsize = sizeof(void *) * nr_cpu_ids;
1320 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1321 GFP_KERNEL);
1322 if (!buffer->buffers)
1323 goto fail_free_cpumask;
1324
1325 cpu = raw_smp_processor_id();
1326 cpumask_set_cpu(cpu, buffer->cpumask);
1327 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1328 if (!buffer->buffers[cpu])
1329 goto fail_free_buffers;
1330
1331 ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1332 if (ret < 0)
1333 goto fail_free_buffers;
1334
1335 mutex_init(&buffer->mutex);
1336
1337 return buffer;
1338
1339 fail_free_buffers:
1340 for_each_buffer_cpu(buffer, cpu) {
1341 if (buffer->buffers[cpu])
1342 rb_free_cpu_buffer(buffer->buffers[cpu]);
1343 }
1344 kfree(buffer->buffers);
1345
1346 fail_free_cpumask:
1347 free_cpumask_var(buffer->cpumask);
1348
1349 fail_free_buffer:
1350 kfree(buffer);
1351 return NULL;
1352}
1353EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1354
1355
1356
1357
1358
1359void
1360ring_buffer_free(struct ring_buffer *buffer)
1361{
1362 int cpu;
1363
1364 cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1365
1366 for_each_buffer_cpu(buffer, cpu)
1367 rb_free_cpu_buffer(buffer->buffers[cpu]);
1368
1369 kfree(buffer->buffers);
1370 free_cpumask_var(buffer->cpumask);
1371
1372 kfree(buffer);
1373}
1374EXPORT_SYMBOL_GPL(ring_buffer_free);
1375
1376void ring_buffer_set_clock(struct ring_buffer *buffer,
1377 u64 (*clock)(void))
1378{
1379 buffer->clock = clock;
1380}
1381
1382static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1383
1384static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1385{
1386 return local_read(&bpage->entries) & RB_WRITE_MASK;
1387}
1388
1389static inline unsigned long rb_page_write(struct buffer_page *bpage)
1390{
1391 return local_read(&bpage->write) & RB_WRITE_MASK;
1392}
1393
1394static int
1395rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
1396{
1397 struct list_head *tail_page, *to_remove, *next_page;
1398 struct buffer_page *to_remove_page, *tmp_iter_page;
1399 struct buffer_page *last_page, *first_page;
1400 unsigned long nr_removed;
1401 unsigned long head_bit;
1402 int page_entries;
1403
1404 head_bit = 0;
1405
1406 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1407 atomic_inc(&cpu_buffer->record_disabled);
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417 tail_page = &cpu_buffer->tail_page->list;
1418
1419
1420
1421
1422
1423 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1424 tail_page = rb_list_head(tail_page->next);
1425 to_remove = tail_page;
1426
1427
1428 first_page = list_entry(rb_list_head(to_remove->next),
1429 struct buffer_page, list);
1430
1431 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1432 to_remove = rb_list_head(to_remove)->next;
1433 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1434 }
1435
1436 next_page = rb_list_head(to_remove)->next;
1437
1438
1439
1440
1441
1442
1443 tail_page->next = (struct list_head *)((unsigned long)next_page |
1444 head_bit);
1445 next_page = rb_list_head(next_page);
1446 next_page->prev = tail_page;
1447
1448
1449 cpu_buffer->pages = next_page;
1450
1451
1452 if (head_bit)
1453 cpu_buffer->head_page = list_entry(next_page,
1454 struct buffer_page, list);
1455
1456
1457
1458
1459
1460 cpu_buffer->read = 0;
1461
1462
1463 atomic_dec(&cpu_buffer->record_disabled);
1464 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1465
1466 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1467
1468
1469 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1470 list);
1471 tmp_iter_page = first_page;
1472
1473 do {
1474 to_remove_page = tmp_iter_page;
1475 rb_inc_page(cpu_buffer, &tmp_iter_page);
1476
1477
1478 page_entries = rb_page_entries(to_remove_page);
1479 if (page_entries) {
1480
1481
1482
1483
1484
1485
1486 local_add(page_entries, &cpu_buffer->overrun);
1487 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1488 }
1489
1490
1491
1492
1493
1494 free_buffer_page(to_remove_page);
1495 nr_removed--;
1496
1497 } while (to_remove_page != last_page);
1498
1499 RB_WARN_ON(cpu_buffer, nr_removed);
1500
1501 return nr_removed == 0;
1502}
1503
1504static int
1505rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1506{
1507 struct list_head *pages = &cpu_buffer->new_pages;
1508 int retries, success;
1509
1510 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 retries = 10;
1526 success = 0;
1527 while (retries--) {
1528 struct list_head *head_page, *prev_page, *r;
1529 struct list_head *last_page, *first_page;
1530 struct list_head *head_page_with_bit;
1531
1532 head_page = &rb_set_head_page(cpu_buffer)->list;
1533 if (!head_page)
1534 break;
1535 prev_page = head_page->prev;
1536
1537 first_page = pages->next;
1538 last_page = pages->prev;
1539
1540 head_page_with_bit = (struct list_head *)
1541 ((unsigned long)head_page | RB_PAGE_HEAD);
1542
1543 last_page->next = head_page_with_bit;
1544 first_page->prev = prev_page;
1545
1546 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1547
1548 if (r == head_page_with_bit) {
1549
1550
1551
1552
1553
1554 head_page->prev = last_page;
1555 success = 1;
1556 break;
1557 }
1558 }
1559
1560 if (success)
1561 INIT_LIST_HEAD(pages);
1562
1563
1564
1565
1566 RB_WARN_ON(cpu_buffer, !success);
1567 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1568
1569
1570 if (!success) {
1571 struct buffer_page *bpage, *tmp;
1572 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1573 list) {
1574 list_del_init(&bpage->list);
1575 free_buffer_page(bpage);
1576 }
1577 }
1578 return success;
1579}
1580
1581static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1582{
1583 int success;
1584
1585 if (cpu_buffer->nr_pages_to_update > 0)
1586 success = rb_insert_pages(cpu_buffer);
1587 else
1588 success = rb_remove_pages(cpu_buffer,
1589 -cpu_buffer->nr_pages_to_update);
1590
1591 if (success)
1592 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1593}
1594
1595static void update_pages_handler(struct work_struct *work)
1596{
1597 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1598 struct ring_buffer_per_cpu, update_pages_work);
1599 rb_update_pages(cpu_buffer);
1600 complete(&cpu_buffer->update_done);
1601}
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1614 int cpu_id)
1615{
1616 struct ring_buffer_per_cpu *cpu_buffer;
1617 unsigned long nr_pages;
1618 int cpu, err = 0;
1619
1620
1621
1622
1623 if (!buffer)
1624 return size;
1625
1626
1627 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1628 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1629 return size;
1630
1631 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1632
1633
1634 if (nr_pages < 2)
1635 nr_pages = 2;
1636
1637 size = nr_pages * BUF_PAGE_SIZE;
1638
1639
1640
1641
1642
1643
1644 if (atomic_read(&buffer->resize_disabled))
1645 return -EBUSY;
1646
1647
1648 mutex_lock(&buffer->mutex);
1649
1650 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1651
1652 for_each_buffer_cpu(buffer, cpu) {
1653 cpu_buffer = buffer->buffers[cpu];
1654
1655 cpu_buffer->nr_pages_to_update = nr_pages -
1656 cpu_buffer->nr_pages;
1657
1658
1659
1660 if (cpu_buffer->nr_pages_to_update <= 0)
1661 continue;
1662
1663
1664
1665
1666 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1667 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1668 &cpu_buffer->new_pages, cpu)) {
1669
1670 err = -ENOMEM;
1671 goto out_err;
1672 }
1673 }
1674
1675 get_online_cpus();
1676
1677
1678
1679
1680
1681 for_each_buffer_cpu(buffer, cpu) {
1682 cpu_buffer = buffer->buffers[cpu];
1683 if (!cpu_buffer->nr_pages_to_update)
1684 continue;
1685
1686
1687 if (!cpu_online(cpu)) {
1688 rb_update_pages(cpu_buffer);
1689 cpu_buffer->nr_pages_to_update = 0;
1690 } else {
1691 schedule_work_on(cpu,
1692 &cpu_buffer->update_pages_work);
1693 }
1694 }
1695
1696
1697 for_each_buffer_cpu(buffer, cpu) {
1698 cpu_buffer = buffer->buffers[cpu];
1699 if (!cpu_buffer->nr_pages_to_update)
1700 continue;
1701
1702 if (cpu_online(cpu))
1703 wait_for_completion(&cpu_buffer->update_done);
1704 cpu_buffer->nr_pages_to_update = 0;
1705 }
1706
1707 put_online_cpus();
1708 } else {
1709
1710 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1711 goto out;
1712
1713 cpu_buffer = buffer->buffers[cpu_id];
1714
1715 if (nr_pages == cpu_buffer->nr_pages)
1716 goto out;
1717
1718 cpu_buffer->nr_pages_to_update = nr_pages -
1719 cpu_buffer->nr_pages;
1720
1721 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1722 if (cpu_buffer->nr_pages_to_update > 0 &&
1723 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1724 &cpu_buffer->new_pages, cpu_id)) {
1725 err = -ENOMEM;
1726 goto out_err;
1727 }
1728
1729 get_online_cpus();
1730
1731
1732 if (!cpu_online(cpu_id))
1733 rb_update_pages(cpu_buffer);
1734 else {
1735 schedule_work_on(cpu_id,
1736 &cpu_buffer->update_pages_work);
1737 wait_for_completion(&cpu_buffer->update_done);
1738 }
1739
1740 cpu_buffer->nr_pages_to_update = 0;
1741 put_online_cpus();
1742 }
1743
1744 out:
1745
1746
1747
1748
1749
1750
1751
1752 if (atomic_read(&buffer->record_disabled)) {
1753 atomic_inc(&buffer->record_disabled);
1754
1755
1756
1757
1758
1759
1760 synchronize_sched();
1761 for_each_buffer_cpu(buffer, cpu) {
1762 cpu_buffer = buffer->buffers[cpu];
1763 rb_check_pages(cpu_buffer);
1764 }
1765 atomic_dec(&buffer->record_disabled);
1766 }
1767
1768 mutex_unlock(&buffer->mutex);
1769 return size;
1770
1771 out_err:
1772 for_each_buffer_cpu(buffer, cpu) {
1773 struct buffer_page *bpage, *tmp;
1774
1775 cpu_buffer = buffer->buffers[cpu];
1776 cpu_buffer->nr_pages_to_update = 0;
1777
1778 if (list_empty(&cpu_buffer->new_pages))
1779 continue;
1780
1781 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1782 list) {
1783 list_del_init(&bpage->list);
1784 free_buffer_page(bpage);
1785 }
1786 }
1787 mutex_unlock(&buffer->mutex);
1788 return err;
1789}
1790EXPORT_SYMBOL_GPL(ring_buffer_resize);
1791
1792void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1793{
1794 mutex_lock(&buffer->mutex);
1795 if (val)
1796 buffer->flags |= RB_FL_OVERWRITE;
1797 else
1798 buffer->flags &= ~RB_FL_OVERWRITE;
1799 mutex_unlock(&buffer->mutex);
1800}
1801EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1802
1803static __always_inline void *
1804__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1805{
1806 return bpage->data + index;
1807}
1808
1809static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1810{
1811 return bpage->page->data + index;
1812}
1813
1814static __always_inline struct ring_buffer_event *
1815rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1816{
1817 return __rb_page_index(cpu_buffer->reader_page,
1818 cpu_buffer->reader_page->read);
1819}
1820
1821static __always_inline struct ring_buffer_event *
1822rb_iter_head_event(struct ring_buffer_iter *iter)
1823{
1824 return __rb_page_index(iter->head_page, iter->head);
1825}
1826
1827static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
1828{
1829 return local_read(&bpage->page->commit);
1830}
1831
1832
1833static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
1834{
1835 return rb_page_commit(bpage);
1836}
1837
1838static __always_inline unsigned
1839rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1840{
1841 return rb_page_commit(cpu_buffer->commit_page);
1842}
1843
1844static __always_inline unsigned
1845rb_event_index(struct ring_buffer_event *event)
1846{
1847 unsigned long addr = (unsigned long)event;
1848
1849 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1850}
1851
1852static void rb_inc_iter(struct ring_buffer_iter *iter)
1853{
1854 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1855
1856
1857
1858
1859
1860
1861
1862 if (iter->head_page == cpu_buffer->reader_page)
1863 iter->head_page = rb_set_head_page(cpu_buffer);
1864 else
1865 rb_inc_page(cpu_buffer, &iter->head_page);
1866
1867 iter->read_stamp = iter->head_page->page->time_stamp;
1868 iter->head = 0;
1869}
1870
1871
1872
1873
1874
1875
1876
1877
1878static int
1879rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1880 struct buffer_page *tail_page,
1881 struct buffer_page *next_page)
1882{
1883 struct buffer_page *new_head;
1884 int entries;
1885 int type;
1886 int ret;
1887
1888 entries = rb_page_entries(next_page);
1889
1890
1891
1892
1893
1894
1895 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1896 RB_PAGE_HEAD);
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909 switch (type) {
1910 case RB_PAGE_HEAD:
1911
1912
1913
1914
1915
1916 local_add(entries, &cpu_buffer->overrun);
1917 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1918
1919
1920
1921
1922
1923
1924
1925 break;
1926
1927 case RB_PAGE_UPDATE:
1928
1929
1930
1931
1932 break;
1933 case RB_PAGE_NORMAL:
1934
1935
1936
1937
1938
1939 return 1;
1940 case RB_PAGE_MOVED:
1941
1942
1943
1944
1945
1946 return 1;
1947 default:
1948 RB_WARN_ON(cpu_buffer, 1);
1949 return -1;
1950 }
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966 new_head = next_page;
1967 rb_inc_page(cpu_buffer, &new_head);
1968
1969 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1970 RB_PAGE_NORMAL);
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980 switch (ret) {
1981 case RB_PAGE_HEAD:
1982 case RB_PAGE_NORMAL:
1983
1984 break;
1985 default:
1986 RB_WARN_ON(cpu_buffer, 1);
1987 return -1;
1988 }
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000 if (ret == RB_PAGE_NORMAL) {
2001 struct buffer_page *buffer_tail_page;
2002
2003 buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
2004
2005
2006
2007
2008 if (buffer_tail_page != tail_page &&
2009 buffer_tail_page != next_page)
2010 rb_head_page_set_normal(cpu_buffer, new_head,
2011 next_page,
2012 RB_PAGE_HEAD);
2013 }
2014
2015
2016
2017
2018
2019
2020 if (type == RB_PAGE_HEAD) {
2021 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2022 tail_page,
2023 RB_PAGE_UPDATE);
2024 if (RB_WARN_ON(cpu_buffer,
2025 ret != RB_PAGE_UPDATE))
2026 return -1;
2027 }
2028
2029 return 0;
2030}
2031
2032static inline void
2033rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2034 unsigned long tail, struct rb_event_info *info)
2035{
2036 struct buffer_page *tail_page = info->tail_page;
2037 struct ring_buffer_event *event;
2038 unsigned long length = info->length;
2039
2040
2041
2042
2043
2044 if (tail >= BUF_PAGE_SIZE) {
2045
2046
2047
2048
2049
2050 if (tail == BUF_PAGE_SIZE)
2051 tail_page->real_end = 0;
2052
2053 local_sub(length, &tail_page->write);
2054 return;
2055 }
2056
2057 event = __rb_page_index(tail_page, tail);
2058 kmemcheck_annotate_bitfield(event, bitfield);
2059
2060
2061 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2062
2063
2064
2065
2066
2067
2068 tail_page->real_end = tail;
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2082
2083
2084
2085 rb_event_set_padding(event);
2086
2087
2088 local_sub(length, &tail_page->write);
2089 return;
2090 }
2091
2092
2093 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2094 event->type_len = RINGBUF_TYPE_PADDING;
2095
2096 event->time_delta = 1;
2097
2098
2099 length = (tail + length) - BUF_PAGE_SIZE;
2100 local_sub(length, &tail_page->write);
2101}
2102
2103static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2104
2105
2106
2107
2108static noinline struct ring_buffer_event *
2109rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2110 unsigned long tail, struct rb_event_info *info)
2111{
2112 struct buffer_page *tail_page = info->tail_page;
2113 struct buffer_page *commit_page = cpu_buffer->commit_page;
2114 struct ring_buffer *buffer = cpu_buffer->buffer;
2115 struct buffer_page *next_page;
2116 int ret;
2117
2118 next_page = tail_page;
2119
2120 rb_inc_page(cpu_buffer, &next_page);
2121
2122
2123
2124
2125
2126
2127 if (unlikely(next_page == commit_page)) {
2128 local_inc(&cpu_buffer->commit_overrun);
2129 goto out_reset;
2130 }
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2147
2148
2149
2150
2151
2152 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2153
2154
2155
2156
2157 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2158 local_inc(&cpu_buffer->dropped_events);
2159 goto out_reset;
2160 }
2161
2162 ret = rb_handle_head_page(cpu_buffer,
2163 tail_page,
2164 next_page);
2165 if (ret < 0)
2166 goto out_reset;
2167 if (ret)
2168 goto out_again;
2169 } else {
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180 if (unlikely((cpu_buffer->commit_page !=
2181 cpu_buffer->tail_page) &&
2182 (cpu_buffer->commit_page ==
2183 cpu_buffer->reader_page))) {
2184 local_inc(&cpu_buffer->commit_overrun);
2185 goto out_reset;
2186 }
2187 }
2188 }
2189
2190 rb_tail_page_update(cpu_buffer, tail_page, next_page);
2191
2192 out_again:
2193
2194 rb_reset_tail(cpu_buffer, tail, info);
2195
2196
2197 rb_end_commit(cpu_buffer);
2198
2199 local_inc(&cpu_buffer->committing);
2200
2201
2202 return ERR_PTR(-EAGAIN);
2203
2204 out_reset:
2205
2206 rb_reset_tail(cpu_buffer, tail, info);
2207
2208 return NULL;
2209}
2210
2211
2212static noinline struct ring_buffer_event *
2213rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
2214{
2215 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2216
2217
2218 if (rb_event_index(event)) {
2219 event->time_delta = delta & TS_MASK;
2220 event->array[0] = delta >> TS_SHIFT;
2221 } else {
2222
2223 event->time_delta = 0;
2224 event->array[0] = 0;
2225 }
2226
2227 return skip_time_extend(event);
2228}
2229
2230static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2231 struct ring_buffer_event *event);
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244static void
2245rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2246 struct ring_buffer_event *event,
2247 struct rb_event_info *info)
2248{
2249 unsigned length = info->length;
2250 u64 delta = info->delta;
2251
2252
2253 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2254 delta = 0;
2255
2256
2257
2258
2259
2260 if (unlikely(info->add_timestamp)) {
2261 event = rb_add_time_stamp(event, delta);
2262 length -= RB_LEN_TIME_EXTEND;
2263 delta = 0;
2264 }
2265
2266 event->time_delta = delta;
2267 length -= RB_EVNT_HDR_SIZE;
2268 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2269 event->type_len = 0;
2270 event->array[0] = length;
2271 } else
2272 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2273}
2274
2275static unsigned rb_calculate_event_length(unsigned length)
2276{
2277 struct ring_buffer_event event;
2278
2279
2280 if (!length)
2281 length++;
2282
2283 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2284 length += sizeof(event.array[0]);
2285
2286 length += RB_EVNT_HDR_SIZE;
2287 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2302 length += RB_ALIGNMENT;
2303
2304 return length;
2305}
2306
2307#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2308static inline bool sched_clock_stable(void)
2309{
2310 return true;
2311}
2312#endif
2313
2314static inline int
2315rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2316 struct ring_buffer_event *event)
2317{
2318 unsigned long new_index, old_index;
2319 struct buffer_page *bpage;
2320 unsigned long index;
2321 unsigned long addr;
2322
2323 new_index = rb_event_index(event);
2324 old_index = new_index + rb_event_ts_length(event);
2325 addr = (unsigned long)event;
2326 addr &= PAGE_MASK;
2327
2328 bpage = READ_ONCE(cpu_buffer->tail_page);
2329
2330 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2331 unsigned long write_mask =
2332 local_read(&bpage->write) & ~RB_WRITE_MASK;
2333 unsigned long event_length = rb_event_length(event);
2334
2335
2336
2337
2338
2339
2340 old_index += write_mask;
2341 new_index += write_mask;
2342 index = local_cmpxchg(&bpage->write, old_index, new_index);
2343 if (index == old_index) {
2344
2345 local_sub(event_length, &cpu_buffer->entries_bytes);
2346 return 1;
2347 }
2348 }
2349
2350
2351 return 0;
2352}
2353
2354static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2355{
2356 local_inc(&cpu_buffer->committing);
2357 local_inc(&cpu_buffer->commits);
2358}
2359
2360static __always_inline void
2361rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2362{
2363 unsigned long max_count;
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373 again:
2374 max_count = cpu_buffer->nr_pages * 100;
2375
2376 while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2377 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2378 return;
2379 if (RB_WARN_ON(cpu_buffer,
2380 rb_is_reader_page(cpu_buffer->tail_page)))
2381 return;
2382 local_set(&cpu_buffer->commit_page->page->commit,
2383 rb_page_write(cpu_buffer->commit_page));
2384 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
2385
2386 if (rb_page_write(cpu_buffer->commit_page))
2387 cpu_buffer->write_stamp =
2388 cpu_buffer->commit_page->page->time_stamp;
2389
2390 barrier();
2391 }
2392 while (rb_commit_index(cpu_buffer) !=
2393 rb_page_write(cpu_buffer->commit_page)) {
2394
2395 local_set(&cpu_buffer->commit_page->page->commit,
2396 rb_page_write(cpu_buffer->commit_page));
2397 RB_WARN_ON(cpu_buffer,
2398 local_read(&cpu_buffer->commit_page->page->commit) &
2399 ~RB_WRITE_MASK);
2400 barrier();
2401 }
2402
2403
2404 barrier();
2405
2406
2407
2408
2409
2410
2411 if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
2412 goto again;
2413}
2414
2415static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2416{
2417 unsigned long commits;
2418
2419 if (RB_WARN_ON(cpu_buffer,
2420 !local_read(&cpu_buffer->committing)))
2421 return;
2422
2423 again:
2424 commits = local_read(&cpu_buffer->commits);
2425
2426 barrier();
2427 if (local_read(&cpu_buffer->committing) == 1)
2428 rb_set_commit_to_write(cpu_buffer);
2429
2430 local_dec(&cpu_buffer->committing);
2431
2432
2433 barrier();
2434
2435
2436
2437
2438
2439
2440 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2441 !local_read(&cpu_buffer->committing)) {
2442 local_inc(&cpu_buffer->committing);
2443 goto again;
2444 }
2445}
2446
2447static inline void rb_event_discard(struct ring_buffer_event *event)
2448{
2449 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2450 event = skip_time_extend(event);
2451
2452
2453 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2454 event->type_len = RINGBUF_TYPE_PADDING;
2455
2456 if (!event->time_delta)
2457 event->time_delta = 1;
2458}
2459
2460static __always_inline bool
2461rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2462 struct ring_buffer_event *event)
2463{
2464 unsigned long addr = (unsigned long)event;
2465 unsigned long index;
2466
2467 index = rb_event_index(event);
2468 addr &= PAGE_MASK;
2469
2470 return cpu_buffer->commit_page->page == (void *)addr &&
2471 rb_commit_index(cpu_buffer) == index;
2472}
2473
2474static __always_inline void
2475rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2476 struct ring_buffer_event *event)
2477{
2478 u64 delta;
2479
2480
2481
2482
2483
2484 if (rb_event_is_commit(cpu_buffer, event)) {
2485
2486
2487
2488
2489 if (!rb_event_index(event))
2490 cpu_buffer->write_stamp =
2491 cpu_buffer->commit_page->page->time_stamp;
2492 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2493 delta = event->array[0];
2494 delta <<= TS_SHIFT;
2495 delta += event->time_delta;
2496 cpu_buffer->write_stamp += delta;
2497 } else
2498 cpu_buffer->write_stamp += event->time_delta;
2499 }
2500}
2501
2502static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2503 struct ring_buffer_event *event)
2504{
2505 local_inc(&cpu_buffer->entries);
2506 rb_update_write_stamp(cpu_buffer, event);
2507 rb_end_commit(cpu_buffer);
2508}
2509
2510static __always_inline void
2511rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2512{
2513 bool pagebusy;
2514
2515 if (buffer->irq_work.waiters_pending) {
2516 buffer->irq_work.waiters_pending = false;
2517
2518 irq_work_queue(&buffer->irq_work.work);
2519 }
2520
2521 if (cpu_buffer->irq_work.waiters_pending) {
2522 cpu_buffer->irq_work.waiters_pending = false;
2523
2524 irq_work_queue(&cpu_buffer->irq_work.work);
2525 }
2526
2527 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2528
2529 if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2530 cpu_buffer->irq_work.wakeup_full = true;
2531 cpu_buffer->irq_work.full_waiters_pending = false;
2532
2533 irq_work_queue(&cpu_buffer->irq_work.work);
2534 }
2535}
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575static __always_inline int
2576trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2577{
2578 unsigned int val = cpu_buffer->current_context;
2579 int bit;
2580
2581 if (in_interrupt()) {
2582 if (in_nmi())
2583 bit = RB_CTX_NMI;
2584 else if (in_irq())
2585 bit = RB_CTX_IRQ;
2586 else
2587 bit = RB_CTX_SOFTIRQ;
2588 } else
2589 bit = RB_CTX_NORMAL;
2590
2591 if (unlikely(val & (1 << bit)))
2592 return 1;
2593
2594 val |= (1 << bit);
2595 cpu_buffer->current_context = val;
2596
2597 return 0;
2598}
2599
2600static __always_inline void
2601trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2602{
2603 cpu_buffer->current_context &= cpu_buffer->current_context - 1;
2604}
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2616 struct ring_buffer_event *event)
2617{
2618 struct ring_buffer_per_cpu *cpu_buffer;
2619 int cpu = raw_smp_processor_id();
2620
2621 cpu_buffer = buffer->buffers[cpu];
2622
2623 rb_commit(cpu_buffer, event);
2624
2625 rb_wakeups(buffer, cpu_buffer);
2626
2627 trace_recursive_unlock(cpu_buffer);
2628
2629 preempt_enable_notrace();
2630
2631 return 0;
2632}
2633EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2634
2635static noinline void
2636rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2637 struct rb_event_info *info)
2638{
2639 WARN_ONCE(info->delta > (1ULL << 59),
2640 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2641 (unsigned long long)info->delta,
2642 (unsigned long long)info->ts,
2643 (unsigned long long)cpu_buffer->write_stamp,
2644 sched_clock_stable() ? "" :
2645 "If you just came from a suspend/resume,\n"
2646 "please switch to the trace global clock:\n"
2647 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2648 info->add_timestamp = 1;
2649}
2650
2651static struct ring_buffer_event *
2652__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2653 struct rb_event_info *info)
2654{
2655 struct ring_buffer_event *event;
2656 struct buffer_page *tail_page;
2657 unsigned long tail, write;
2658
2659
2660
2661
2662
2663
2664 if (unlikely(info->add_timestamp))
2665 info->length += RB_LEN_TIME_EXTEND;
2666
2667
2668 tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
2669 write = local_add_return(info->length, &tail_page->write);
2670
2671
2672 write &= RB_WRITE_MASK;
2673 tail = write - info->length;
2674
2675
2676
2677
2678
2679 if (!tail)
2680 info->delta = 0;
2681
2682
2683 if (unlikely(write > BUF_PAGE_SIZE))
2684 return rb_move_tail(cpu_buffer, tail, info);
2685
2686
2687
2688 event = __rb_page_index(tail_page, tail);
2689 kmemcheck_annotate_bitfield(event, bitfield);
2690 rb_update_event(cpu_buffer, event, info);
2691
2692 local_inc(&tail_page->entries);
2693
2694
2695
2696
2697
2698 if (!tail)
2699 tail_page->page->time_stamp = info->ts;
2700
2701
2702 local_add(info->length, &cpu_buffer->entries_bytes);
2703
2704 return event;
2705}
2706
2707static __always_inline struct ring_buffer_event *
2708rb_reserve_next_event(struct ring_buffer *buffer,
2709 struct ring_buffer_per_cpu *cpu_buffer,
2710 unsigned long length)
2711{
2712 struct ring_buffer_event *event;
2713 struct rb_event_info info;
2714 int nr_loops = 0;
2715 u64 diff;
2716
2717 rb_start_commit(cpu_buffer);
2718
2719#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2720
2721
2722
2723
2724
2725
2726 barrier();
2727 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2728 local_dec(&cpu_buffer->committing);
2729 local_dec(&cpu_buffer->commits);
2730 return NULL;
2731 }
2732#endif
2733
2734 info.length = rb_calculate_event_length(length);
2735 again:
2736 info.add_timestamp = 0;
2737 info.delta = 0;
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2749 goto out_fail;
2750
2751 info.ts = rb_time_stamp(cpu_buffer->buffer);
2752 diff = info.ts - cpu_buffer->write_stamp;
2753
2754
2755 barrier();
2756
2757
2758 if (likely(info.ts >= cpu_buffer->write_stamp)) {
2759 info.delta = diff;
2760 if (unlikely(test_time_stamp(info.delta)))
2761 rb_handle_timestamp(cpu_buffer, &info);
2762 }
2763
2764 event = __rb_reserve_next(cpu_buffer, &info);
2765
2766 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
2767 if (info.add_timestamp)
2768 info.length -= RB_LEN_TIME_EXTEND;
2769 goto again;
2770 }
2771
2772 if (!event)
2773 goto out_fail;
2774
2775 return event;
2776
2777 out_fail:
2778 rb_end_commit(cpu_buffer);
2779 return NULL;
2780}
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797struct ring_buffer_event *
2798ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2799{
2800 struct ring_buffer_per_cpu *cpu_buffer;
2801 struct ring_buffer_event *event;
2802 int cpu;
2803
2804
2805 preempt_disable_notrace();
2806
2807 if (unlikely(atomic_read(&buffer->record_disabled)))
2808 goto out;
2809
2810 cpu = raw_smp_processor_id();
2811
2812 if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
2813 goto out;
2814
2815 cpu_buffer = buffer->buffers[cpu];
2816
2817 if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
2818 goto out;
2819
2820 if (unlikely(length > BUF_MAX_DATA_SIZE))
2821 goto out;
2822
2823 if (unlikely(trace_recursive_lock(cpu_buffer)))
2824 goto out;
2825
2826 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2827 if (!event)
2828 goto out_unlock;
2829
2830 return event;
2831
2832 out_unlock:
2833 trace_recursive_unlock(cpu_buffer);
2834 out:
2835 preempt_enable_notrace();
2836 return NULL;
2837}
2838EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
2839
2840
2841
2842
2843
2844
2845
2846static inline void
2847rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2848 struct ring_buffer_event *event)
2849{
2850 unsigned long addr = (unsigned long)event;
2851 struct buffer_page *bpage = cpu_buffer->commit_page;
2852 struct buffer_page *start;
2853
2854 addr &= PAGE_MASK;
2855
2856
2857 if (likely(bpage->page == (void *)addr)) {
2858 local_dec(&bpage->entries);
2859 return;
2860 }
2861
2862
2863
2864
2865
2866 rb_inc_page(cpu_buffer, &bpage);
2867 start = bpage;
2868 do {
2869 if (bpage->page == (void *)addr) {
2870 local_dec(&bpage->entries);
2871 return;
2872 }
2873 rb_inc_page(cpu_buffer, &bpage);
2874 } while (bpage != start);
2875
2876
2877 RB_WARN_ON(cpu_buffer, 1);
2878}
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899void ring_buffer_discard_commit(struct ring_buffer *buffer,
2900 struct ring_buffer_event *event)
2901{
2902 struct ring_buffer_per_cpu *cpu_buffer;
2903 int cpu;
2904
2905
2906 rb_event_discard(event);
2907
2908 cpu = smp_processor_id();
2909 cpu_buffer = buffer->buffers[cpu];
2910
2911
2912
2913
2914
2915
2916 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
2917
2918 rb_decrement_entry(cpu_buffer, event);
2919 if (rb_try_to_discard(cpu_buffer, event))
2920 goto out;
2921
2922
2923
2924
2925
2926 rb_update_write_stamp(cpu_buffer, event);
2927 out:
2928 rb_end_commit(cpu_buffer);
2929
2930 trace_recursive_unlock(cpu_buffer);
2931
2932 preempt_enable_notrace();
2933
2934}
2935EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950int ring_buffer_write(struct ring_buffer *buffer,
2951 unsigned long length,
2952 void *data)
2953{
2954 struct ring_buffer_per_cpu *cpu_buffer;
2955 struct ring_buffer_event *event;
2956 void *body;
2957 int ret = -EBUSY;
2958 int cpu;
2959
2960 preempt_disable_notrace();
2961
2962 if (atomic_read(&buffer->record_disabled))
2963 goto out;
2964
2965 cpu = raw_smp_processor_id();
2966
2967 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2968 goto out;
2969
2970 cpu_buffer = buffer->buffers[cpu];
2971
2972 if (atomic_read(&cpu_buffer->record_disabled))
2973 goto out;
2974
2975 if (length > BUF_MAX_DATA_SIZE)
2976 goto out;
2977
2978 if (unlikely(trace_recursive_lock(cpu_buffer)))
2979 goto out;
2980
2981 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2982 if (!event)
2983 goto out_unlock;
2984
2985 body = rb_event_data(event);
2986
2987 memcpy(body, data, length);
2988
2989 rb_commit(cpu_buffer, event);
2990
2991 rb_wakeups(buffer, cpu_buffer);
2992
2993 ret = 0;
2994
2995 out_unlock:
2996 trace_recursive_unlock(cpu_buffer);
2997
2998 out:
2999 preempt_enable_notrace();
3000
3001 return ret;
3002}
3003EXPORT_SYMBOL_GPL(ring_buffer_write);
3004
3005static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3006{
3007 struct buffer_page *reader = cpu_buffer->reader_page;
3008 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3009 struct buffer_page *commit = cpu_buffer->commit_page;
3010
3011
3012 if (unlikely(!head))
3013 return true;
3014
3015 return reader->read == rb_page_commit(reader) &&
3016 (commit == reader ||
3017 (commit == head &&
3018 head->read == rb_page_commit(commit)));
3019}
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030void ring_buffer_record_disable(struct ring_buffer *buffer)
3031{
3032 atomic_inc(&buffer->record_disabled);
3033}
3034EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3035
3036
3037
3038
3039
3040
3041
3042
3043void ring_buffer_record_enable(struct ring_buffer *buffer)
3044{
3045 atomic_dec(&buffer->record_disabled);
3046}
3047EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060void ring_buffer_record_off(struct ring_buffer *buffer)
3061{
3062 unsigned int rd;
3063 unsigned int new_rd;
3064
3065 do {
3066 rd = atomic_read(&buffer->record_disabled);
3067 new_rd = rd | RB_BUFFER_OFF;
3068 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3069}
3070EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083void ring_buffer_record_on(struct ring_buffer *buffer)
3084{
3085 unsigned int rd;
3086 unsigned int new_rd;
3087
3088 do {
3089 rd = atomic_read(&buffer->record_disabled);
3090 new_rd = rd & ~RB_BUFFER_OFF;
3091 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3092}
3093EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3094
3095
3096
3097
3098
3099
3100
3101int ring_buffer_record_is_on(struct ring_buffer *buffer)
3102{
3103 return !atomic_read(&buffer->record_disabled);
3104}
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
3117{
3118 struct ring_buffer_per_cpu *cpu_buffer;
3119
3120 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3121 return;
3122
3123 cpu_buffer = buffer->buffers[cpu];
3124 atomic_inc(&cpu_buffer->record_disabled);
3125}
3126EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
3137{
3138 struct ring_buffer_per_cpu *cpu_buffer;
3139
3140 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3141 return;
3142
3143 cpu_buffer = buffer->buffers[cpu];
3144 atomic_dec(&cpu_buffer->record_disabled);
3145}
3146EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
3147
3148
3149
3150
3151
3152
3153
3154static inline unsigned long
3155rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3156{
3157 return local_read(&cpu_buffer->entries) -
3158 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3159}
3160
3161
3162
3163
3164
3165
3166u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
3167{
3168 unsigned long flags;
3169 struct ring_buffer_per_cpu *cpu_buffer;
3170 struct buffer_page *bpage;
3171 u64 ret = 0;
3172
3173 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3174 return 0;
3175
3176 cpu_buffer = buffer->buffers[cpu];
3177 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3178
3179
3180
3181
3182 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3183 bpage = cpu_buffer->reader_page;
3184 else
3185 bpage = rb_set_head_page(cpu_buffer);
3186 if (bpage)
3187 ret = bpage->page->time_stamp;
3188 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3189
3190 return ret;
3191}
3192EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3193
3194
3195
3196
3197
3198
3199unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
3200{
3201 struct ring_buffer_per_cpu *cpu_buffer;
3202 unsigned long ret;
3203
3204 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3205 return 0;
3206
3207 cpu_buffer = buffer->buffers[cpu];
3208 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3209
3210 return ret;
3211}
3212EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3213
3214
3215
3216
3217
3218
3219unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
3220{
3221 struct ring_buffer_per_cpu *cpu_buffer;
3222
3223 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3224 return 0;
3225
3226 cpu_buffer = buffer->buffers[cpu];
3227
3228 return rb_num_of_entries(cpu_buffer);
3229}
3230EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3231
3232
3233
3234
3235
3236
3237
3238unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3239{
3240 struct ring_buffer_per_cpu *cpu_buffer;
3241 unsigned long ret;
3242
3243 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3244 return 0;
3245
3246 cpu_buffer = buffer->buffers[cpu];
3247 ret = local_read(&cpu_buffer->overrun);
3248
3249 return ret;
3250}
3251EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3252
3253
3254
3255
3256
3257
3258
3259
3260unsigned long
3261ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3262{
3263 struct ring_buffer_per_cpu *cpu_buffer;
3264 unsigned long ret;
3265
3266 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3267 return 0;
3268
3269 cpu_buffer = buffer->buffers[cpu];
3270 ret = local_read(&cpu_buffer->commit_overrun);
3271
3272 return ret;
3273}
3274EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3275
3276
3277
3278
3279
3280
3281
3282unsigned long
3283ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3284{
3285 struct ring_buffer_per_cpu *cpu_buffer;
3286 unsigned long ret;
3287
3288 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3289 return 0;
3290
3291 cpu_buffer = buffer->buffers[cpu];
3292 ret = local_read(&cpu_buffer->dropped_events);
3293
3294 return ret;
3295}
3296EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3297
3298
3299
3300
3301
3302
3303unsigned long
3304ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3305{
3306 struct ring_buffer_per_cpu *cpu_buffer;
3307
3308 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3309 return 0;
3310
3311 cpu_buffer = buffer->buffers[cpu];
3312 return cpu_buffer->read;
3313}
3314EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3315
3316
3317
3318
3319
3320
3321
3322
3323unsigned long ring_buffer_entries(struct ring_buffer *buffer)
3324{
3325 struct ring_buffer_per_cpu *cpu_buffer;
3326 unsigned long entries = 0;
3327 int cpu;
3328
3329
3330 for_each_buffer_cpu(buffer, cpu) {
3331 cpu_buffer = buffer->buffers[cpu];
3332 entries += rb_num_of_entries(cpu_buffer);
3333 }
3334
3335 return entries;
3336}
3337EXPORT_SYMBOL_GPL(ring_buffer_entries);
3338
3339
3340
3341
3342
3343
3344
3345
3346unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
3347{
3348 struct ring_buffer_per_cpu *cpu_buffer;
3349 unsigned long overruns = 0;
3350 int cpu;
3351
3352
3353 for_each_buffer_cpu(buffer, cpu) {
3354 cpu_buffer = buffer->buffers[cpu];
3355 overruns += local_read(&cpu_buffer->overrun);
3356 }
3357
3358 return overruns;
3359}
3360EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3361
3362static void rb_iter_reset(struct ring_buffer_iter *iter)
3363{
3364 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3365
3366
3367 iter->head_page = cpu_buffer->reader_page;
3368 iter->head = cpu_buffer->reader_page->read;
3369
3370 iter->cache_reader_page = iter->head_page;
3371 iter->cache_read = cpu_buffer->read;
3372
3373 if (iter->head)
3374 iter->read_stamp = cpu_buffer->read_stamp;
3375 else
3376 iter->read_stamp = iter->head_page->page->time_stamp;
3377}
3378
3379
3380
3381
3382
3383
3384
3385
3386void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3387{
3388 struct ring_buffer_per_cpu *cpu_buffer;
3389 unsigned long flags;
3390
3391 if (!iter)
3392 return;
3393
3394 cpu_buffer = iter->cpu_buffer;
3395
3396 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3397 rb_iter_reset(iter);
3398 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3399}
3400EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3401
3402
3403
3404
3405
3406int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3407{
3408 struct ring_buffer_per_cpu *cpu_buffer;
3409 struct buffer_page *reader;
3410 struct buffer_page *head_page;
3411 struct buffer_page *commit_page;
3412 unsigned commit;
3413
3414 cpu_buffer = iter->cpu_buffer;
3415
3416
3417 reader = cpu_buffer->reader_page;
3418 head_page = cpu_buffer->head_page;
3419 commit_page = cpu_buffer->commit_page;
3420 commit = rb_page_commit(commit_page);
3421
3422 return ((iter->head_page == commit_page && iter->head == commit) ||
3423 (iter->head_page == reader && commit_page == head_page &&
3424 head_page->read == commit &&
3425 iter->head == rb_page_commit(cpu_buffer->reader_page)));
3426}
3427EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3428
3429static void
3430rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3431 struct ring_buffer_event *event)
3432{
3433 u64 delta;
3434
3435 switch (event->type_len) {
3436 case RINGBUF_TYPE_PADDING:
3437 return;
3438
3439 case RINGBUF_TYPE_TIME_EXTEND:
3440 delta = event->array[0];
3441 delta <<= TS_SHIFT;
3442 delta += event->time_delta;
3443 cpu_buffer->read_stamp += delta;
3444 return;
3445
3446 case RINGBUF_TYPE_TIME_STAMP:
3447
3448 return;
3449
3450 case RINGBUF_TYPE_DATA:
3451 cpu_buffer->read_stamp += event->time_delta;
3452 return;
3453
3454 default:
3455 BUG();
3456 }
3457 return;
3458}
3459
3460static void
3461rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3462 struct ring_buffer_event *event)
3463{
3464 u64 delta;
3465
3466 switch (event->type_len) {
3467 case RINGBUF_TYPE_PADDING:
3468 return;
3469
3470 case RINGBUF_TYPE_TIME_EXTEND:
3471 delta = event->array[0];
3472 delta <<= TS_SHIFT;
3473 delta += event->time_delta;
3474 iter->read_stamp += delta;
3475 return;
3476
3477 case RINGBUF_TYPE_TIME_STAMP:
3478
3479 return;
3480
3481 case RINGBUF_TYPE_DATA:
3482 iter->read_stamp += event->time_delta;
3483 return;
3484
3485 default:
3486 BUG();
3487 }
3488 return;
3489}
3490
3491static struct buffer_page *
3492rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3493{
3494 struct buffer_page *reader = NULL;
3495 unsigned long overwrite;
3496 unsigned long flags;
3497 int nr_loops = 0;
3498 int ret;
3499
3500 local_irq_save(flags);
3501 arch_spin_lock(&cpu_buffer->lock);
3502
3503 again:
3504
3505
3506
3507
3508
3509
3510 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3511 reader = NULL;
3512 goto out;
3513 }
3514
3515 reader = cpu_buffer->reader_page;
3516
3517
3518 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3519 goto out;
3520
3521
3522 if (RB_WARN_ON(cpu_buffer,
3523 cpu_buffer->reader_page->read > rb_page_size(reader)))
3524 goto out;
3525
3526
3527 reader = NULL;
3528 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3529 goto out;
3530
3531
3532 if (rb_num_of_entries(cpu_buffer) == 0)
3533 goto out;
3534
3535
3536
3537
3538 local_set(&cpu_buffer->reader_page->write, 0);
3539 local_set(&cpu_buffer->reader_page->entries, 0);
3540 local_set(&cpu_buffer->reader_page->page->commit, 0);
3541 cpu_buffer->reader_page->real_end = 0;
3542
3543 spin:
3544
3545
3546
3547 reader = rb_set_head_page(cpu_buffer);
3548 if (!reader)
3549 goto out;
3550 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3551 cpu_buffer->reader_page->list.prev = reader->list.prev;
3552
3553
3554
3555
3556
3557
3558 cpu_buffer->pages = reader->list.prev;
3559
3560
3561 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572 smp_mb();
3573 overwrite = local_read(&(cpu_buffer->overrun));
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3587
3588
3589
3590
3591 if (!ret)
3592 goto spin;
3593
3594
3595
3596
3597
3598
3599 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3600 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3601
3602
3603 cpu_buffer->reader_page = reader;
3604 cpu_buffer->reader_page->read = 0;
3605
3606 if (overwrite != cpu_buffer->last_overrun) {
3607 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3608 cpu_buffer->last_overrun = overwrite;
3609 }
3610
3611 goto again;
3612
3613 out:
3614
3615 if (reader && reader->read == 0)
3616 cpu_buffer->read_stamp = reader->page->time_stamp;
3617
3618 arch_spin_unlock(&cpu_buffer->lock);
3619 local_irq_restore(flags);
3620
3621 return reader;
3622}
3623
3624static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3625{
3626 struct ring_buffer_event *event;
3627 struct buffer_page *reader;
3628 unsigned length;
3629
3630 reader = rb_get_reader_page(cpu_buffer);
3631
3632
3633 if (RB_WARN_ON(cpu_buffer, !reader))
3634 return;
3635
3636 event = rb_reader_event(cpu_buffer);
3637
3638 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3639 cpu_buffer->read++;
3640
3641 rb_update_read_stamp(cpu_buffer, event);
3642
3643 length = rb_event_length(event);
3644 cpu_buffer->reader_page->read += length;
3645}
3646
3647static void rb_advance_iter(struct ring_buffer_iter *iter)
3648{
3649 struct ring_buffer_per_cpu *cpu_buffer;
3650 struct ring_buffer_event *event;
3651 unsigned length;
3652
3653 cpu_buffer = iter->cpu_buffer;
3654
3655
3656
3657
3658 if (iter->head >= rb_page_size(iter->head_page)) {
3659
3660 if (iter->head_page == cpu_buffer->commit_page)
3661 return;
3662 rb_inc_iter(iter);
3663 return;
3664 }
3665
3666 event = rb_iter_head_event(iter);
3667
3668 length = rb_event_length(event);
3669
3670
3671
3672
3673
3674 if (RB_WARN_ON(cpu_buffer,
3675 (iter->head_page == cpu_buffer->commit_page) &&
3676 (iter->head + length > rb_commit_index(cpu_buffer))))
3677 return;
3678
3679 rb_update_iter_read_stamp(iter, event);
3680
3681 iter->head += length;
3682
3683
3684 if ((iter->head >= rb_page_size(iter->head_page)) &&
3685 (iter->head_page != cpu_buffer->commit_page))
3686 rb_inc_iter(iter);
3687}
3688
3689static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3690{
3691 return cpu_buffer->lost_events;
3692}
3693
3694static struct ring_buffer_event *
3695rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3696 unsigned long *lost_events)
3697{
3698 struct ring_buffer_event *event;
3699 struct buffer_page *reader;
3700 int nr_loops = 0;
3701
3702 again:
3703
3704
3705
3706
3707
3708
3709 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3710 return NULL;
3711
3712 reader = rb_get_reader_page(cpu_buffer);
3713 if (!reader)
3714 return NULL;
3715
3716 event = rb_reader_event(cpu_buffer);
3717
3718 switch (event->type_len) {
3719 case RINGBUF_TYPE_PADDING:
3720 if (rb_null_event(event))
3721 RB_WARN_ON(cpu_buffer, 1);
3722
3723
3724
3725
3726
3727
3728
3729
3730 return event;
3731
3732 case RINGBUF_TYPE_TIME_EXTEND:
3733
3734 rb_advance_reader(cpu_buffer);
3735 goto again;
3736
3737 case RINGBUF_TYPE_TIME_STAMP:
3738
3739 rb_advance_reader(cpu_buffer);
3740 goto again;
3741
3742 case RINGBUF_TYPE_DATA:
3743 if (ts) {
3744 *ts = cpu_buffer->read_stamp + event->time_delta;
3745 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3746 cpu_buffer->cpu, ts);
3747 }
3748 if (lost_events)
3749 *lost_events = rb_lost_events(cpu_buffer);
3750 return event;
3751
3752 default:
3753 BUG();
3754 }
3755
3756 return NULL;
3757}
3758EXPORT_SYMBOL_GPL(ring_buffer_peek);
3759
3760static struct ring_buffer_event *
3761rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3762{
3763 struct ring_buffer *buffer;
3764 struct ring_buffer_per_cpu *cpu_buffer;
3765 struct ring_buffer_event *event;
3766 int nr_loops = 0;
3767
3768 cpu_buffer = iter->cpu_buffer;
3769 buffer = cpu_buffer->buffer;
3770
3771
3772
3773
3774
3775
3776 if (unlikely(iter->cache_read != cpu_buffer->read ||
3777 iter->cache_reader_page != cpu_buffer->reader_page))
3778 rb_iter_reset(iter);
3779
3780 again:
3781 if (ring_buffer_iter_empty(iter))
3782 return NULL;
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
3793 return NULL;
3794
3795 if (rb_per_cpu_empty(cpu_buffer))
3796 return NULL;
3797
3798 if (iter->head >= rb_page_size(iter->head_page)) {
3799 rb_inc_iter(iter);
3800 goto again;
3801 }
3802
3803 event = rb_iter_head_event(iter);
3804
3805 switch (event->type_len) {
3806 case RINGBUF_TYPE_PADDING:
3807 if (rb_null_event(event)) {
3808 rb_inc_iter(iter);
3809 goto again;
3810 }
3811 rb_advance_iter(iter);
3812 return event;
3813
3814 case RINGBUF_TYPE_TIME_EXTEND:
3815
3816 rb_advance_iter(iter);
3817 goto again;
3818
3819 case RINGBUF_TYPE_TIME_STAMP:
3820
3821 rb_advance_iter(iter);
3822 goto again;
3823
3824 case RINGBUF_TYPE_DATA:
3825 if (ts) {
3826 *ts = iter->read_stamp + event->time_delta;
3827 ring_buffer_normalize_time_stamp(buffer,
3828 cpu_buffer->cpu, ts);
3829 }
3830 return event;
3831
3832 default:
3833 BUG();
3834 }
3835
3836 return NULL;
3837}
3838EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
3839
3840static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
3841{
3842 if (likely(!in_nmi())) {
3843 raw_spin_lock(&cpu_buffer->reader_lock);
3844 return true;
3845 }
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856 if (raw_spin_trylock(&cpu_buffer->reader_lock))
3857 return true;
3858
3859
3860 atomic_inc(&cpu_buffer->record_disabled);
3861 return false;
3862}
3863
3864static inline void
3865rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
3866{
3867 if (likely(locked))
3868 raw_spin_unlock(&cpu_buffer->reader_lock);
3869 return;
3870}
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882struct ring_buffer_event *
3883ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3884 unsigned long *lost_events)
3885{
3886 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3887 struct ring_buffer_event *event;
3888 unsigned long flags;
3889 bool dolock;
3890
3891 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3892 return NULL;
3893
3894 again:
3895 local_irq_save(flags);
3896 dolock = rb_reader_lock(cpu_buffer);
3897 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3898 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3899 rb_advance_reader(cpu_buffer);
3900 rb_reader_unlock(cpu_buffer, dolock);
3901 local_irq_restore(flags);
3902
3903 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3904 goto again;
3905
3906 return event;
3907}
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917struct ring_buffer_event *
3918ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3919{
3920 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3921 struct ring_buffer_event *event;
3922 unsigned long flags;
3923
3924 again:
3925 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3926 event = rb_iter_peek(iter, ts);
3927 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3928
3929 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3930 goto again;
3931
3932 return event;
3933}
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946struct ring_buffer_event *
3947ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3948 unsigned long *lost_events)
3949{
3950 struct ring_buffer_per_cpu *cpu_buffer;
3951 struct ring_buffer_event *event = NULL;
3952 unsigned long flags;
3953 bool dolock;
3954
3955 again:
3956
3957 preempt_disable();
3958
3959 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3960 goto out;
3961
3962 cpu_buffer = buffer->buffers[cpu];
3963 local_irq_save(flags);
3964 dolock = rb_reader_lock(cpu_buffer);
3965
3966 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3967 if (event) {
3968 cpu_buffer->lost_events = 0;
3969 rb_advance_reader(cpu_buffer);
3970 }
3971
3972 rb_reader_unlock(cpu_buffer, dolock);
3973 local_irq_restore(flags);
3974
3975 out:
3976 preempt_enable();
3977
3978 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3979 goto again;
3980
3981 return event;
3982}
3983EXPORT_SYMBOL_GPL(ring_buffer_consume);
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005struct ring_buffer_iter *
4006ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
4007{
4008 struct ring_buffer_per_cpu *cpu_buffer;
4009 struct ring_buffer_iter *iter;
4010
4011 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4012 return NULL;
4013
4014 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
4015 if (!iter)
4016 return NULL;
4017
4018 cpu_buffer = buffer->buffers[cpu];
4019
4020 iter->cpu_buffer = cpu_buffer;
4021
4022 atomic_inc(&buffer->resize_disabled);
4023 atomic_inc(&cpu_buffer->record_disabled);
4024
4025 return iter;
4026}
4027EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
4028
4029
4030
4031
4032
4033
4034
4035
4036void
4037ring_buffer_read_prepare_sync(void)
4038{
4039 synchronize_sched();
4040}
4041EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054void
4055ring_buffer_read_start(struct ring_buffer_iter *iter)
4056{
4057 struct ring_buffer_per_cpu *cpu_buffer;
4058 unsigned long flags;
4059
4060 if (!iter)
4061 return;
4062
4063 cpu_buffer = iter->cpu_buffer;
4064
4065 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4066 arch_spin_lock(&cpu_buffer->lock);
4067 rb_iter_reset(iter);
4068 arch_spin_unlock(&cpu_buffer->lock);
4069 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4070}
4071EXPORT_SYMBOL_GPL(ring_buffer_read_start);
4072
4073
4074
4075
4076
4077
4078
4079
4080void
4081ring_buffer_read_finish(struct ring_buffer_iter *iter)
4082{
4083 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4084 unsigned long flags;
4085
4086
4087
4088
4089
4090
4091
4092 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4093 rb_check_pages(cpu_buffer);
4094 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4095
4096 atomic_dec(&cpu_buffer->record_disabled);
4097 atomic_dec(&cpu_buffer->buffer->resize_disabled);
4098 kfree(iter);
4099}
4100EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
4101
4102
4103
4104
4105
4106
4107
4108
4109struct ring_buffer_event *
4110ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
4111{
4112 struct ring_buffer_event *event;
4113 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4114 unsigned long flags;
4115
4116 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4117 again:
4118 event = rb_iter_peek(iter, ts);
4119 if (!event)
4120 goto out;
4121
4122 if (event->type_len == RINGBUF_TYPE_PADDING)
4123 goto again;
4124
4125 rb_advance_iter(iter);
4126 out:
4127 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4128
4129 return event;
4130}
4131EXPORT_SYMBOL_GPL(ring_buffer_read);
4132
4133
4134
4135
4136
4137unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
4138{
4139
4140
4141
4142
4143
4144
4145 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4146 return 0;
4147
4148 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
4149}
4150EXPORT_SYMBOL_GPL(ring_buffer_size);
4151
4152static void
4153rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
4154{
4155 rb_head_page_deactivate(cpu_buffer);
4156
4157 cpu_buffer->head_page
4158 = list_entry(cpu_buffer->pages, struct buffer_page, list);
4159 local_set(&cpu_buffer->head_page->write, 0);
4160 local_set(&cpu_buffer->head_page->entries, 0);
4161 local_set(&cpu_buffer->head_page->page->commit, 0);
4162
4163 cpu_buffer->head_page->read = 0;
4164
4165 cpu_buffer->tail_page = cpu_buffer->head_page;
4166 cpu_buffer->commit_page = cpu_buffer->head_page;
4167
4168 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
4169 INIT_LIST_HEAD(&cpu_buffer->new_pages);
4170 local_set(&cpu_buffer->reader_page->write, 0);
4171 local_set(&cpu_buffer->reader_page->entries, 0);
4172 local_set(&cpu_buffer->reader_page->page->commit, 0);
4173 cpu_buffer->reader_page->read = 0;
4174
4175 local_set(&cpu_buffer->entries_bytes, 0);
4176 local_set(&cpu_buffer->overrun, 0);
4177 local_set(&cpu_buffer->commit_overrun, 0);
4178 local_set(&cpu_buffer->dropped_events, 0);
4179 local_set(&cpu_buffer->entries, 0);
4180 local_set(&cpu_buffer->committing, 0);
4181 local_set(&cpu_buffer->commits, 0);
4182 cpu_buffer->read = 0;
4183 cpu_buffer->read_bytes = 0;
4184
4185 cpu_buffer->write_stamp = 0;
4186 cpu_buffer->read_stamp = 0;
4187
4188 cpu_buffer->lost_events = 0;
4189 cpu_buffer->last_overrun = 0;
4190
4191 rb_head_page_activate(cpu_buffer);
4192}
4193
4194
4195
4196
4197
4198
4199void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
4200{
4201 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4202 unsigned long flags;
4203
4204 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4205 return;
4206
4207 atomic_inc(&buffer->resize_disabled);
4208 atomic_inc(&cpu_buffer->record_disabled);
4209
4210
4211 synchronize_sched();
4212
4213 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4214
4215 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
4216 goto out;
4217
4218 arch_spin_lock(&cpu_buffer->lock);
4219
4220 rb_reset_cpu(cpu_buffer);
4221
4222 arch_spin_unlock(&cpu_buffer->lock);
4223
4224 out:
4225 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4226
4227 atomic_dec(&cpu_buffer->record_disabled);
4228 atomic_dec(&buffer->resize_disabled);
4229}
4230EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
4231
4232
4233
4234
4235
4236void ring_buffer_reset(struct ring_buffer *buffer)
4237{
4238 int cpu;
4239
4240 for_each_buffer_cpu(buffer, cpu)
4241 ring_buffer_reset_cpu(buffer, cpu);
4242}
4243EXPORT_SYMBOL_GPL(ring_buffer_reset);
4244
4245
4246
4247
4248
4249bool ring_buffer_empty(struct ring_buffer *buffer)
4250{
4251 struct ring_buffer_per_cpu *cpu_buffer;
4252 unsigned long flags;
4253 bool dolock;
4254 int cpu;
4255 int ret;
4256
4257
4258 for_each_buffer_cpu(buffer, cpu) {
4259 cpu_buffer = buffer->buffers[cpu];
4260 local_irq_save(flags);
4261 dolock = rb_reader_lock(cpu_buffer);
4262 ret = rb_per_cpu_empty(cpu_buffer);
4263 rb_reader_unlock(cpu_buffer, dolock);
4264 local_irq_restore(flags);
4265
4266 if (!ret)
4267 return false;
4268 }
4269
4270 return true;
4271}
4272EXPORT_SYMBOL_GPL(ring_buffer_empty);
4273
4274
4275
4276
4277
4278
4279bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
4280{
4281 struct ring_buffer_per_cpu *cpu_buffer;
4282 unsigned long flags;
4283 bool dolock;
4284 int ret;
4285
4286 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4287 return true;
4288
4289 cpu_buffer = buffer->buffers[cpu];
4290 local_irq_save(flags);
4291 dolock = rb_reader_lock(cpu_buffer);
4292 ret = rb_per_cpu_empty(cpu_buffer);
4293 rb_reader_unlock(cpu_buffer, dolock);
4294 local_irq_restore(flags);
4295
4296 return ret;
4297}
4298EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
4299
4300#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4312 struct ring_buffer *buffer_b, int cpu)
4313{
4314 struct ring_buffer_per_cpu *cpu_buffer_a;
4315 struct ring_buffer_per_cpu *cpu_buffer_b;
4316 int ret = -EINVAL;
4317
4318 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4319 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4320 goto out;
4321
4322 cpu_buffer_a = buffer_a->buffers[cpu];
4323 cpu_buffer_b = buffer_b->buffers[cpu];
4324
4325
4326 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4327 goto out;
4328
4329 ret = -EAGAIN;
4330
4331 if (atomic_read(&buffer_a->record_disabled))
4332 goto out;
4333
4334 if (atomic_read(&buffer_b->record_disabled))
4335 goto out;
4336
4337 if (atomic_read(&cpu_buffer_a->record_disabled))
4338 goto out;
4339
4340 if (atomic_read(&cpu_buffer_b->record_disabled))
4341 goto out;
4342
4343
4344
4345
4346
4347
4348
4349 atomic_inc(&cpu_buffer_a->record_disabled);
4350 atomic_inc(&cpu_buffer_b->record_disabled);
4351
4352 ret = -EBUSY;
4353 if (local_read(&cpu_buffer_a->committing))
4354 goto out_dec;
4355 if (local_read(&cpu_buffer_b->committing))
4356 goto out_dec;
4357
4358 buffer_a->buffers[cpu] = cpu_buffer_b;
4359 buffer_b->buffers[cpu] = cpu_buffer_a;
4360
4361 cpu_buffer_b->buffer = buffer_a;
4362 cpu_buffer_a->buffer = buffer_b;
4363
4364 ret = 0;
4365
4366out_dec:
4367 atomic_dec(&cpu_buffer_a->record_disabled);
4368 atomic_dec(&cpu_buffer_b->record_disabled);
4369out:
4370 return ret;
4371}
4372EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4373#endif
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4392{
4393 struct ring_buffer_per_cpu *cpu_buffer;
4394 struct buffer_data_page *bpage = NULL;
4395 unsigned long flags;
4396 struct page *page;
4397
4398 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4399 return ERR_PTR(-ENODEV);
4400
4401 cpu_buffer = buffer->buffers[cpu];
4402 local_irq_save(flags);
4403 arch_spin_lock(&cpu_buffer->lock);
4404
4405 if (cpu_buffer->free_page) {
4406 bpage = cpu_buffer->free_page;
4407 cpu_buffer->free_page = NULL;
4408 }
4409
4410 arch_spin_unlock(&cpu_buffer->lock);
4411 local_irq_restore(flags);
4412
4413 if (bpage)
4414 goto out;
4415
4416 page = alloc_pages_node(cpu_to_node(cpu),
4417 GFP_KERNEL | __GFP_NORETRY, 0);
4418 if (!page)
4419 return ERR_PTR(-ENOMEM);
4420
4421 bpage = page_address(page);
4422
4423 out:
4424 rb_init_page(bpage);
4425
4426 return bpage;
4427}
4428EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4439{
4440 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4441 struct buffer_data_page *bpage = data;
4442 unsigned long flags;
4443
4444 local_irq_save(flags);
4445 arch_spin_lock(&cpu_buffer->lock);
4446
4447 if (!cpu_buffer->free_page) {
4448 cpu_buffer->free_page = bpage;
4449 bpage = NULL;
4450 }
4451
4452 arch_spin_unlock(&cpu_buffer->lock);
4453 local_irq_restore(flags);
4454
4455 free_page((unsigned long)bpage);
4456}
4457EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492int ring_buffer_read_page(struct ring_buffer *buffer,
4493 void **data_page, size_t len, int cpu, int full)
4494{
4495 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4496 struct ring_buffer_event *event;
4497 struct buffer_data_page *bpage;
4498 struct buffer_page *reader;
4499 unsigned long missed_events;
4500 unsigned long flags;
4501 unsigned int commit;
4502 unsigned int read;
4503 u64 save_timestamp;
4504 int ret = -1;
4505
4506 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4507 goto out;
4508
4509
4510
4511
4512
4513 if (len <= BUF_PAGE_HDR_SIZE)
4514 goto out;
4515
4516 len -= BUF_PAGE_HDR_SIZE;
4517
4518 if (!data_page)
4519 goto out;
4520
4521 bpage = *data_page;
4522 if (!bpage)
4523 goto out;
4524
4525 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4526
4527 reader = rb_get_reader_page(cpu_buffer);
4528 if (!reader)
4529 goto out_unlock;
4530
4531 event = rb_reader_event(cpu_buffer);
4532
4533 read = reader->read;
4534 commit = rb_page_commit(reader);
4535
4536
4537 missed_events = cpu_buffer->lost_events;
4538
4539
4540
4541
4542
4543
4544
4545
4546 if (read || (len < (commit - read)) ||
4547 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4548 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4549 unsigned int rpos = read;
4550 unsigned int pos = 0;
4551 unsigned int size;
4552
4553 if (full)
4554 goto out_unlock;
4555
4556 if (len > (commit - read))
4557 len = (commit - read);
4558
4559
4560 size = rb_event_ts_length(event);
4561
4562 if (len < size)
4563 goto out_unlock;
4564
4565
4566 save_timestamp = cpu_buffer->read_stamp;
4567
4568
4569 do {
4570
4571
4572
4573
4574
4575
4576 size = rb_event_length(event);
4577 memcpy(bpage->data + pos, rpage->data + rpos, size);
4578
4579 len -= size;
4580
4581 rb_advance_reader(cpu_buffer);
4582 rpos = reader->read;
4583 pos += size;
4584
4585 if (rpos >= commit)
4586 break;
4587
4588 event = rb_reader_event(cpu_buffer);
4589
4590 size = rb_event_ts_length(event);
4591 } while (len >= size);
4592
4593
4594 local_set(&bpage->commit, pos);
4595 bpage->time_stamp = save_timestamp;
4596
4597
4598 read = 0;
4599 } else {
4600
4601 cpu_buffer->read += rb_page_entries(reader);
4602 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4603
4604
4605 rb_init_page(bpage);
4606 bpage = reader->page;
4607 reader->page = *data_page;
4608 local_set(&reader->write, 0);
4609 local_set(&reader->entries, 0);
4610 reader->read = 0;
4611 *data_page = bpage;
4612
4613
4614
4615
4616
4617
4618 if (reader->real_end)
4619 local_set(&bpage->commit, reader->real_end);
4620 }
4621 ret = read;
4622
4623 cpu_buffer->lost_events = 0;
4624
4625 commit = local_read(&bpage->commit);
4626
4627
4628
4629 if (missed_events) {
4630
4631
4632
4633 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4634 memcpy(&bpage->data[commit], &missed_events,
4635 sizeof(missed_events));
4636 local_add(RB_MISSED_STORED, &bpage->commit);
4637 commit += sizeof(missed_events);
4638 }
4639 local_add(RB_MISSED_EVENTS, &bpage->commit);
4640 }
4641
4642
4643
4644
4645 if (commit < BUF_PAGE_SIZE)
4646 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4647
4648 out_unlock:
4649 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4650
4651 out:
4652 return ret;
4653}
4654EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4655
4656
4657
4658
4659
4660
4661int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
4662{
4663 struct ring_buffer *buffer;
4664 long nr_pages_same;
4665 int cpu_i;
4666 unsigned long nr_pages;
4667
4668 buffer = container_of(node, struct ring_buffer, node);
4669 if (cpumask_test_cpu(cpu, buffer->cpumask))
4670 return 0;
4671
4672 nr_pages = 0;
4673 nr_pages_same = 1;
4674
4675 for_each_buffer_cpu(buffer, cpu_i) {
4676
4677 if (nr_pages == 0)
4678 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4679 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4680 nr_pages_same = 0;
4681 break;
4682 }
4683 }
4684
4685 if (!nr_pages_same)
4686 nr_pages = 2;
4687 buffer->buffers[cpu] =
4688 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4689 if (!buffer->buffers[cpu]) {
4690 WARN(1, "failed to allocate ring buffer on CPU %u\n",
4691 cpu);
4692 return -ENOMEM;
4693 }
4694 smp_wmb();
4695 cpumask_set_cpu(cpu, buffer->cpumask);
4696 return 0;
4697}
4698
4699#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715static struct task_struct *rb_threads[NR_CPUS] __initdata;
4716
4717struct rb_test_data {
4718 struct ring_buffer *buffer;
4719 unsigned long events;
4720 unsigned long bytes_written;
4721 unsigned long bytes_alloc;
4722 unsigned long bytes_dropped;
4723 unsigned long events_nested;
4724 unsigned long bytes_written_nested;
4725 unsigned long bytes_alloc_nested;
4726 unsigned long bytes_dropped_nested;
4727 int min_size_nested;
4728 int max_size_nested;
4729 int max_size;
4730 int min_size;
4731 int cpu;
4732 int cnt;
4733};
4734
4735static struct rb_test_data rb_data[NR_CPUS] __initdata;
4736
4737
4738#define RB_TEST_BUFFER_SIZE 1048576
4739
4740static char rb_string[] __initdata =
4741 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
4742 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
4743 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
4744
4745static bool rb_test_started __initdata;
4746
4747struct rb_item {
4748 int size;
4749 char str[];
4750};
4751
4752static __init int rb_write_something(struct rb_test_data *data, bool nested)
4753{
4754 struct ring_buffer_event *event;
4755 struct rb_item *item;
4756 bool started;
4757 int event_len;
4758 int size;
4759 int len;
4760 int cnt;
4761
4762
4763 cnt = data->cnt + (nested ? 27 : 0);
4764
4765
4766 size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
4767
4768 len = size + sizeof(struct rb_item);
4769
4770 started = rb_test_started;
4771
4772 smp_rmb();
4773
4774 event = ring_buffer_lock_reserve(data->buffer, len);
4775 if (!event) {
4776
4777 if (started) {
4778 if (nested)
4779 data->bytes_dropped += len;
4780 else
4781 data->bytes_dropped_nested += len;
4782 }
4783 return len;
4784 }
4785
4786 event_len = ring_buffer_event_length(event);
4787
4788 if (RB_WARN_ON(data->buffer, event_len < len))
4789 goto out;
4790
4791 item = ring_buffer_event_data(event);
4792 item->size = size;
4793 memcpy(item->str, rb_string, size);
4794
4795 if (nested) {
4796 data->bytes_alloc_nested += event_len;
4797 data->bytes_written_nested += len;
4798 data->events_nested++;
4799 if (!data->min_size_nested || len < data->min_size_nested)
4800 data->min_size_nested = len;
4801 if (len > data->max_size_nested)
4802 data->max_size_nested = len;
4803 } else {
4804 data->bytes_alloc += event_len;
4805 data->bytes_written += len;
4806 data->events++;
4807 if (!data->min_size || len < data->min_size)
4808 data->max_size = len;
4809 if (len > data->max_size)
4810 data->max_size = len;
4811 }
4812
4813 out:
4814 ring_buffer_unlock_commit(data->buffer, event);
4815
4816 return 0;
4817}
4818
4819static __init int rb_test(void *arg)
4820{
4821 struct rb_test_data *data = arg;
4822
4823 while (!kthread_should_stop()) {
4824 rb_write_something(data, false);
4825 data->cnt++;
4826
4827 set_current_state(TASK_INTERRUPTIBLE);
4828
4829 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
4830 }
4831
4832 return 0;
4833}
4834
4835static __init void rb_ipi(void *ignore)
4836{
4837 struct rb_test_data *data;
4838 int cpu = smp_processor_id();
4839
4840 data = &rb_data[cpu];
4841 rb_write_something(data, true);
4842}
4843
4844static __init int rb_hammer_test(void *arg)
4845{
4846 while (!kthread_should_stop()) {
4847
4848
4849 smp_call_function(rb_ipi, NULL, 1);
4850
4851 schedule();
4852 }
4853
4854 return 0;
4855}
4856
4857static __init int test_ringbuffer(void)
4858{
4859 struct task_struct *rb_hammer;
4860 struct ring_buffer *buffer;
4861 int cpu;
4862 int ret = 0;
4863
4864 pr_info("Running ring buffer tests...\n");
4865
4866 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
4867 if (WARN_ON(!buffer))
4868 return 0;
4869
4870
4871 ring_buffer_record_off(buffer);
4872
4873 for_each_online_cpu(cpu) {
4874 rb_data[cpu].buffer = buffer;
4875 rb_data[cpu].cpu = cpu;
4876 rb_data[cpu].cnt = cpu;
4877 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
4878 "rbtester/%d", cpu);
4879 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
4880 pr_cont("FAILED\n");
4881 ret = PTR_ERR(rb_threads[cpu]);
4882 goto out_free;
4883 }
4884
4885 kthread_bind(rb_threads[cpu], cpu);
4886 wake_up_process(rb_threads[cpu]);
4887 }
4888
4889
4890 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
4891 if (WARN_ON(IS_ERR(rb_hammer))) {
4892 pr_cont("FAILED\n");
4893 ret = PTR_ERR(rb_hammer);
4894 goto out_free;
4895 }
4896
4897 ring_buffer_record_on(buffer);
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907 smp_wmb();
4908 rb_test_started = true;
4909
4910 set_current_state(TASK_INTERRUPTIBLE);
4911 ;
4912 schedule_timeout(10 * HZ);
4913
4914 kthread_stop(rb_hammer);
4915
4916 out_free:
4917 for_each_online_cpu(cpu) {
4918 if (!rb_threads[cpu])
4919 break;
4920 kthread_stop(rb_threads[cpu]);
4921 }
4922 if (ret) {
4923 ring_buffer_free(buffer);
4924 return ret;
4925 }
4926
4927
4928 pr_info("finished\n");
4929 for_each_online_cpu(cpu) {
4930 struct ring_buffer_event *event;
4931 struct rb_test_data *data = &rb_data[cpu];
4932 struct rb_item *item;
4933 unsigned long total_events;
4934 unsigned long total_dropped;
4935 unsigned long total_written;
4936 unsigned long total_alloc;
4937 unsigned long total_read = 0;
4938 unsigned long total_size = 0;
4939 unsigned long total_len = 0;
4940 unsigned long total_lost = 0;
4941 unsigned long lost;
4942 int big_event_size;
4943 int small_event_size;
4944
4945 ret = -1;
4946
4947 total_events = data->events + data->events_nested;
4948 total_written = data->bytes_written + data->bytes_written_nested;
4949 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
4950 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
4951
4952 big_event_size = data->max_size + data->max_size_nested;
4953 small_event_size = data->min_size + data->min_size_nested;
4954
4955 pr_info("CPU %d:\n", cpu);
4956 pr_info(" events: %ld\n", total_events);
4957 pr_info(" dropped bytes: %ld\n", total_dropped);
4958 pr_info(" alloced bytes: %ld\n", total_alloc);
4959 pr_info(" written bytes: %ld\n", total_written);
4960 pr_info(" biggest event: %d\n", big_event_size);
4961 pr_info(" smallest event: %d\n", small_event_size);
4962
4963 if (RB_WARN_ON(buffer, total_dropped))
4964 break;
4965
4966 ret = 0;
4967
4968 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
4969 total_lost += lost;
4970 item = ring_buffer_event_data(event);
4971 total_len += ring_buffer_event_length(event);
4972 total_size += item->size + sizeof(struct rb_item);
4973 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
4974 pr_info("FAILED!\n");
4975 pr_info("buffer had: %.*s\n", item->size, item->str);
4976 pr_info("expected: %.*s\n", item->size, rb_string);
4977 RB_WARN_ON(buffer, 1);
4978 ret = -1;
4979 break;
4980 }
4981 total_read++;
4982 }
4983 if (ret)
4984 break;
4985
4986 ret = -1;
4987
4988 pr_info(" read events: %ld\n", total_read);
4989 pr_info(" lost events: %ld\n", total_lost);
4990 pr_info(" total events: %ld\n", total_lost + total_read);
4991 pr_info(" recorded len bytes: %ld\n", total_len);
4992 pr_info(" recorded size bytes: %ld\n", total_size);
4993 if (total_lost)
4994 pr_info(" With dropped events, record len and size may not match\n"
4995 " alloced and written from above\n");
4996 if (!total_lost) {
4997 if (RB_WARN_ON(buffer, total_len != total_alloc ||
4998 total_size != total_written))
4999 break;
5000 }
5001 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
5002 break;
5003
5004 ret = 0;
5005 }
5006 if (!ret)
5007 pr_info("Ring buffer PASSED!\n");
5008
5009 ring_buffer_free(buffer);
5010 return 0;
5011}
5012
5013late_initcall(test_ringbuffer);
5014#endif
5015