1
2
3
4
5
6#include <linux/ftrace_event.h>
7#include <linux/ring_buffer.h>
8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h>
10#include <linux/spinlock.h>
11#include <linux/irq_work.h>
12#include <linux/uaccess.h>
13#include <linux/hardirq.h>
14#include <linux/kthread.h>
15#include <linux/kmemcheck.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/mutex.h>
19#include <linux/delay.h>
20#include <linux/slab.h>
21#include <linux/init.h>
22#include <linux/hash.h>
23#include <linux/list.h>
24#include <linux/cpu.h>
25
26#include <asm/local.h>
27
28static void update_pages_handler(struct work_struct *work);
29
30
31
32
33int ring_buffer_print_entry_header(struct trace_seq *s)
34{
35 trace_seq_puts(s, "# compressed entry header\n");
36 trace_seq_puts(s, "\ttype_len : 5 bits\n");
37 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
38 trace_seq_puts(s, "\tarray : 32 bits\n");
39 trace_seq_putc(s, '\n');
40 trace_seq_printf(s, "\tpadding : type == %d\n",
41 RINGBUF_TYPE_PADDING);
42 trace_seq_printf(s, "\ttime_extend : type == %d\n",
43 RINGBUF_TYPE_TIME_EXTEND);
44 trace_seq_printf(s, "\tdata max type_len == %d\n",
45 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
46
47 return !trace_seq_has_overflowed(s);
48}
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147enum {
148 RB_BUFFERS_ON_BIT = 0,
149 RB_BUFFERS_DISABLED_BIT = 1,
150};
151
152enum {
153 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
154 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
155};
156
157static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
158
159
160#define RB_BUFFER_OFF (1 << 20)
161
162#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
163
164
165
166
167
168
169
170void tracing_off_permanent(void)
171{
172 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
173}
174
175#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
176#define RB_ALIGNMENT 4U
177#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
178#define RB_EVNT_MIN_SIZE 8U
179
180#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
181# define RB_FORCE_8BYTE_ALIGNMENT 0
182# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
183#else
184# define RB_FORCE_8BYTE_ALIGNMENT 1
185# define RB_ARCH_ALIGNMENT 8U
186#endif
187
188#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
189
190
191#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
192
193enum {
194 RB_LEN_TIME_EXTEND = 8,
195 RB_LEN_TIME_STAMP = 16,
196};
197
198#define skip_time_extend(event) \
199 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
200
201static inline int rb_null_event(struct ring_buffer_event *event)
202{
203 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
204}
205
206static void rb_event_set_padding(struct ring_buffer_event *event)
207{
208
209 event->type_len = RINGBUF_TYPE_PADDING;
210 event->time_delta = 0;
211}
212
213static unsigned
214rb_event_data_length(struct ring_buffer_event *event)
215{
216 unsigned length;
217
218 if (event->type_len)
219 length = event->type_len * RB_ALIGNMENT;
220 else
221 length = event->array[0];
222 return length + RB_EVNT_HDR_SIZE;
223}
224
225
226
227
228
229
230static inline unsigned
231rb_event_length(struct ring_buffer_event *event)
232{
233 switch (event->type_len) {
234 case RINGBUF_TYPE_PADDING:
235 if (rb_null_event(event))
236
237 return -1;
238 return event->array[0] + RB_EVNT_HDR_SIZE;
239
240 case RINGBUF_TYPE_TIME_EXTEND:
241 return RB_LEN_TIME_EXTEND;
242
243 case RINGBUF_TYPE_TIME_STAMP:
244 return RB_LEN_TIME_STAMP;
245
246 case RINGBUF_TYPE_DATA:
247 return rb_event_data_length(event);
248 default:
249 BUG();
250 }
251
252 return 0;
253}
254
255
256
257
258
259static inline unsigned
260rb_event_ts_length(struct ring_buffer_event *event)
261{
262 unsigned len = 0;
263
264 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
265
266 len = RB_LEN_TIME_EXTEND;
267 event = skip_time_extend(event);
268 }
269 return len + rb_event_length(event);
270}
271
272
273
274
275
276
277
278
279
280
281
282unsigned ring_buffer_event_length(struct ring_buffer_event *event)
283{
284 unsigned length;
285
286 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
287 event = skip_time_extend(event);
288
289 length = rb_event_length(event);
290 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
291 return length;
292 length -= RB_EVNT_HDR_SIZE;
293 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
294 length -= sizeof(event->array[0]);
295 return length;
296}
297EXPORT_SYMBOL_GPL(ring_buffer_event_length);
298
299
300static void *
301rb_event_data(struct ring_buffer_event *event)
302{
303 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
304 event = skip_time_extend(event);
305 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
306
307 if (event->type_len)
308 return (void *)&event->array[0];
309
310 return (void *)&event->array[1];
311}
312
313
314
315
316
317void *ring_buffer_event_data(struct ring_buffer_event *event)
318{
319 return rb_event_data(event);
320}
321EXPORT_SYMBOL_GPL(ring_buffer_event_data);
322
323#define for_each_buffer_cpu(buffer, cpu) \
324 for_each_cpu(cpu, buffer->cpumask)
325
326#define TS_SHIFT 27
327#define TS_MASK ((1ULL << TS_SHIFT) - 1)
328#define TS_DELTA_TEST (~TS_MASK)
329
330
331#define RB_MISSED_EVENTS (1 << 31)
332
333#define RB_MISSED_STORED (1 << 30)
334
335struct buffer_data_page {
336 u64 time_stamp;
337 local_t commit;
338 unsigned char data[] RB_ALIGN_DATA;
339};
340
341
342
343
344
345
346
347
348
349struct buffer_page {
350 struct list_head list;
351 local_t write;
352 unsigned read;
353 local_t entries;
354 unsigned long real_end;
355 struct buffer_data_page *page;
356};
357
358
359
360
361
362
363
364
365
366
367
368
369
370#define RB_WRITE_MASK 0xfffff
371#define RB_WRITE_INTCNT (1 << 20)
372
373static void rb_init_page(struct buffer_data_page *bpage)
374{
375 local_set(&bpage->commit, 0);
376}
377
378
379
380
381
382
383
384size_t ring_buffer_page_len(void *page)
385{
386 return local_read(&((struct buffer_data_page *)page)->commit)
387 + BUF_PAGE_HDR_SIZE;
388}
389
390
391
392
393
394static void free_buffer_page(struct buffer_page *bpage)
395{
396 free_page((unsigned long)bpage->page);
397 kfree(bpage);
398}
399
400
401
402
403static inline int test_time_stamp(u64 delta)
404{
405 if (delta & TS_DELTA_TEST)
406 return 1;
407 return 0;
408}
409
410#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
411
412
413#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
414
415int ring_buffer_print_page_header(struct trace_seq *s)
416{
417 struct buffer_data_page field;
418
419 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
420 "offset:0;\tsize:%u;\tsigned:%u;\n",
421 (unsigned int)sizeof(field.time_stamp),
422 (unsigned int)is_signed_type(u64));
423
424 trace_seq_printf(s, "\tfield: local_t commit;\t"
425 "offset:%u;\tsize:%u;\tsigned:%u;\n",
426 (unsigned int)offsetof(typeof(field), commit),
427 (unsigned int)sizeof(field.commit),
428 (unsigned int)is_signed_type(long));
429
430 trace_seq_printf(s, "\tfield: int overwrite;\t"
431 "offset:%u;\tsize:%u;\tsigned:%u;\n",
432 (unsigned int)offsetof(typeof(field), commit),
433 1,
434 (unsigned int)is_signed_type(long));
435
436 trace_seq_printf(s, "\tfield: char data;\t"
437 "offset:%u;\tsize:%u;\tsigned:%u;\n",
438 (unsigned int)offsetof(typeof(field), data),
439 (unsigned int)BUF_PAGE_SIZE,
440 (unsigned int)is_signed_type(char));
441
442 return !trace_seq_has_overflowed(s);
443}
444
445struct rb_irq_work {
446 struct irq_work work;
447 wait_queue_head_t waiters;
448 wait_queue_head_t full_waiters;
449 bool waiters_pending;
450 bool full_waiters_pending;
451 bool wakeup_full;
452};
453
454
455
456
457struct ring_buffer_per_cpu {
458 int cpu;
459 atomic_t record_disabled;
460 struct ring_buffer *buffer;
461 raw_spinlock_t reader_lock;
462 arch_spinlock_t lock;
463 struct lock_class_key lock_key;
464 unsigned int nr_pages;
465 struct list_head *pages;
466 struct buffer_page *head_page;
467 struct buffer_page *tail_page;
468 struct buffer_page *commit_page;
469 struct buffer_page *reader_page;
470 unsigned long lost_events;
471 unsigned long last_overrun;
472 local_t entries_bytes;
473 local_t entries;
474 local_t overrun;
475 local_t commit_overrun;
476 local_t dropped_events;
477 local_t committing;
478 local_t commits;
479 unsigned long read;
480 unsigned long read_bytes;
481 u64 write_stamp;
482 u64 read_stamp;
483
484 int nr_pages_to_update;
485 struct list_head new_pages;
486 struct work_struct update_pages_work;
487 struct completion update_done;
488
489 struct rb_irq_work irq_work;
490};
491
492struct ring_buffer {
493 unsigned flags;
494 int cpus;
495 atomic_t record_disabled;
496 atomic_t resize_disabled;
497 cpumask_var_t cpumask;
498
499 struct lock_class_key *reader_lock_key;
500
501 struct mutex mutex;
502
503 struct ring_buffer_per_cpu **buffers;
504
505#ifdef CONFIG_HOTPLUG_CPU
506 struct notifier_block cpu_notify;
507#endif
508 u64 (*clock)(void);
509
510 struct rb_irq_work irq_work;
511};
512
513struct ring_buffer_iter {
514 struct ring_buffer_per_cpu *cpu_buffer;
515 unsigned long head;
516 struct buffer_page *head_page;
517 struct buffer_page *cache_reader_page;
518 unsigned long cache_read;
519 u64 read_stamp;
520};
521
522
523
524
525
526
527
528static void rb_wake_up_waiters(struct irq_work *work)
529{
530 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
531
532 wake_up_all(&rbwork->waiters);
533 if (rbwork->wakeup_full) {
534 rbwork->wakeup_full = false;
535 wake_up_all(&rbwork->full_waiters);
536 }
537}
538
539
540
541
542
543
544
545
546
547
548
549int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
550{
551 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
552 DEFINE_WAIT(wait);
553 struct rb_irq_work *work;
554 int ret = 0;
555
556
557
558
559
560
561 if (cpu == RING_BUFFER_ALL_CPUS) {
562 work = &buffer->irq_work;
563
564 full = false;
565 } else {
566 if (!cpumask_test_cpu(cpu, buffer->cpumask))
567 return -ENODEV;
568 cpu_buffer = buffer->buffers[cpu];
569 work = &cpu_buffer->irq_work;
570 }
571
572
573 while (true) {
574 if (full)
575 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
576 else
577 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599 if (full)
600 work->full_waiters_pending = true;
601 else
602 work->waiters_pending = true;
603
604 if (signal_pending(current)) {
605 ret = -EINTR;
606 break;
607 }
608
609 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
610 break;
611
612 if (cpu != RING_BUFFER_ALL_CPUS &&
613 !ring_buffer_empty_cpu(buffer, cpu)) {
614 unsigned long flags;
615 bool pagebusy;
616
617 if (!full)
618 break;
619
620 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
621 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
622 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
623
624 if (!pagebusy)
625 break;
626 }
627
628 schedule();
629 }
630
631 if (full)
632 finish_wait(&work->full_waiters, &wait);
633 else
634 finish_wait(&work->waiters, &wait);
635
636 return ret;
637}
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
654 struct file *filp, poll_table *poll_table)
655{
656 struct ring_buffer_per_cpu *cpu_buffer;
657 struct rb_irq_work *work;
658
659 if (cpu == RING_BUFFER_ALL_CPUS)
660 work = &buffer->irq_work;
661 else {
662 if (!cpumask_test_cpu(cpu, buffer->cpumask))
663 return -EINVAL;
664
665 cpu_buffer = buffer->buffers[cpu];
666 work = &cpu_buffer->irq_work;
667 }
668
669 poll_wait(filp, &work->waiters, poll_table);
670 work->waiters_pending = true;
671
672
673
674
675
676
677
678
679
680
681
682
683
684 smp_mb();
685
686 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
687 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
688 return POLLIN | POLLRDNORM;
689 return 0;
690}
691
692
693#define RB_WARN_ON(b, cond) \
694 ({ \
695 int _____ret = unlikely(cond); \
696 if (_____ret) { \
697 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
698 struct ring_buffer_per_cpu *__b = \
699 (void *)b; \
700 atomic_inc(&__b->buffer->record_disabled); \
701 } else \
702 atomic_inc(&b->record_disabled); \
703 WARN_ON(1); \
704 } \
705 _____ret; \
706 })
707
708
709#define DEBUG_SHIFT 0
710
711static inline u64 rb_time_stamp(struct ring_buffer *buffer)
712{
713
714 return buffer->clock() << DEBUG_SHIFT;
715}
716
717u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
718{
719 u64 time;
720
721 preempt_disable_notrace();
722 time = rb_time_stamp(buffer);
723 preempt_enable_no_resched_notrace();
724
725 return time;
726}
727EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
728
729void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
730 int cpu, u64 *ts)
731{
732
733 *ts >>= DEBUG_SHIFT;
734}
735EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806#define RB_PAGE_NORMAL 0UL
807#define RB_PAGE_HEAD 1UL
808#define RB_PAGE_UPDATE 2UL
809
810
811#define RB_FLAG_MASK 3UL
812
813
814#define RB_PAGE_MOVED 4UL
815
816
817
818
819static struct list_head *rb_list_head(struct list_head *list)
820{
821 unsigned long val = (unsigned long)list;
822
823 return (struct list_head *)(val & ~RB_FLAG_MASK);
824}
825
826
827
828
829
830
831
832
833
834static inline int
835rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
836 struct buffer_page *page, struct list_head *list)
837{
838 unsigned long val;
839
840 val = (unsigned long)list->next;
841
842 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
843 return RB_PAGE_MOVED;
844
845 return val & RB_FLAG_MASK;
846}
847
848
849
850
851
852
853
854
855static int rb_is_reader_page(struct buffer_page *page)
856{
857 struct list_head *list = page->list.prev;
858
859 return rb_list_head(list->next) != &page->list;
860}
861
862
863
864
865static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
866 struct list_head *list)
867{
868 unsigned long *ptr;
869
870 ptr = (unsigned long *)&list->next;
871 *ptr |= RB_PAGE_HEAD;
872 *ptr &= ~RB_PAGE_UPDATE;
873}
874
875
876
877
878static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
879{
880 struct buffer_page *head;
881
882 head = cpu_buffer->head_page;
883 if (!head)
884 return;
885
886
887
888
889 rb_set_list_to_head(cpu_buffer, head->list.prev);
890}
891
892static void rb_list_head_clear(struct list_head *list)
893{
894 unsigned long *ptr = (unsigned long *)&list->next;
895
896 *ptr &= ~RB_FLAG_MASK;
897}
898
899
900
901
902static void
903rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
904{
905 struct list_head *hd;
906
907
908 rb_list_head_clear(cpu_buffer->pages);
909
910 list_for_each(hd, cpu_buffer->pages)
911 rb_list_head_clear(hd);
912}
913
914static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
915 struct buffer_page *head,
916 struct buffer_page *prev,
917 int old_flag, int new_flag)
918{
919 struct list_head *list;
920 unsigned long val = (unsigned long)&head->list;
921 unsigned long ret;
922
923 list = &prev->list;
924
925 val &= ~RB_FLAG_MASK;
926
927 ret = cmpxchg((unsigned long *)&list->next,
928 val | old_flag, val | new_flag);
929
930
931 if ((ret & ~RB_FLAG_MASK) != val)
932 return RB_PAGE_MOVED;
933
934 return ret & RB_FLAG_MASK;
935}
936
937static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
938 struct buffer_page *head,
939 struct buffer_page *prev,
940 int old_flag)
941{
942 return rb_head_page_set(cpu_buffer, head, prev,
943 old_flag, RB_PAGE_UPDATE);
944}
945
946static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
947 struct buffer_page *head,
948 struct buffer_page *prev,
949 int old_flag)
950{
951 return rb_head_page_set(cpu_buffer, head, prev,
952 old_flag, RB_PAGE_HEAD);
953}
954
955static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
956 struct buffer_page *head,
957 struct buffer_page *prev,
958 int old_flag)
959{
960 return rb_head_page_set(cpu_buffer, head, prev,
961 old_flag, RB_PAGE_NORMAL);
962}
963
964static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
965 struct buffer_page **bpage)
966{
967 struct list_head *p = rb_list_head((*bpage)->list.next);
968
969 *bpage = list_entry(p, struct buffer_page, list);
970}
971
972static struct buffer_page *
973rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
974{
975 struct buffer_page *head;
976 struct buffer_page *page;
977 struct list_head *list;
978 int i;
979
980 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
981 return NULL;
982
983
984 list = cpu_buffer->pages;
985 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
986 return NULL;
987
988 page = head = cpu_buffer->head_page;
989
990
991
992
993
994
995 for (i = 0; i < 3; i++) {
996 do {
997 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
998 cpu_buffer->head_page = page;
999 return page;
1000 }
1001 rb_inc_page(cpu_buffer, &page);
1002 } while (page != head);
1003 }
1004
1005 RB_WARN_ON(cpu_buffer, 1);
1006
1007 return NULL;
1008}
1009
1010static int rb_head_page_replace(struct buffer_page *old,
1011 struct buffer_page *new)
1012{
1013 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
1014 unsigned long val;
1015 unsigned long ret;
1016
1017 val = *ptr & ~RB_FLAG_MASK;
1018 val |= RB_PAGE_HEAD;
1019
1020 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
1021
1022 return ret == val;
1023}
1024
1025
1026
1027
1028
1029
1030static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1031 struct buffer_page *tail_page,
1032 struct buffer_page *next_page)
1033{
1034 struct buffer_page *old_tail;
1035 unsigned long old_entries;
1036 unsigned long old_write;
1037 int ret = 0;
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1049 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1050
1051
1052
1053
1054
1055 barrier();
1056
1057
1058
1059
1060
1061
1062 if (tail_page == cpu_buffer->tail_page) {
1063
1064 unsigned long val = old_write & ~RB_WRITE_MASK;
1065 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077 (void)local_cmpxchg(&next_page->write, old_write, val);
1078 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1079
1080
1081
1082
1083
1084
1085 local_set(&next_page->page->commit, 0);
1086
1087 old_tail = cmpxchg(&cpu_buffer->tail_page,
1088 tail_page, next_page);
1089
1090 if (old_tail == tail_page)
1091 ret = 1;
1092 }
1093
1094 return ret;
1095}
1096
1097static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1098 struct buffer_page *bpage)
1099{
1100 unsigned long val = (unsigned long)bpage;
1101
1102 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1103 return 1;
1104
1105 return 0;
1106}
1107
1108
1109
1110
1111static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1112 struct list_head *list)
1113{
1114 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1115 return 1;
1116 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1117 return 1;
1118 return 0;
1119}
1120
1121
1122
1123
1124
1125
1126
1127
1128static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1129{
1130 struct list_head *head = cpu_buffer->pages;
1131 struct buffer_page *bpage, *tmp;
1132
1133
1134 if (cpu_buffer->head_page)
1135 rb_set_head_page(cpu_buffer);
1136
1137 rb_head_page_deactivate(cpu_buffer);
1138
1139 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1140 return -1;
1141 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1142 return -1;
1143
1144 if (rb_check_list(cpu_buffer, head))
1145 return -1;
1146
1147 list_for_each_entry_safe(bpage, tmp, head, list) {
1148 if (RB_WARN_ON(cpu_buffer,
1149 bpage->list.next->prev != &bpage->list))
1150 return -1;
1151 if (RB_WARN_ON(cpu_buffer,
1152 bpage->list.prev->next != &bpage->list))
1153 return -1;
1154 if (rb_check_list(cpu_buffer, &bpage->list))
1155 return -1;
1156 }
1157
1158 rb_head_page_activate(cpu_buffer);
1159
1160 return 0;
1161}
1162
1163static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
1164{
1165 int i;
1166 struct buffer_page *bpage, *tmp;
1167
1168 for (i = 0; i < nr_pages; i++) {
1169 struct page *page;
1170
1171
1172
1173
1174
1175 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1176 GFP_KERNEL | __GFP_NORETRY,
1177 cpu_to_node(cpu));
1178 if (!bpage)
1179 goto free_pages;
1180
1181 list_add(&bpage->list, pages);
1182
1183 page = alloc_pages_node(cpu_to_node(cpu),
1184 GFP_KERNEL | __GFP_NORETRY, 0);
1185 if (!page)
1186 goto free_pages;
1187 bpage->page = page_address(page);
1188 rb_init_page(bpage->page);
1189 }
1190
1191 return 0;
1192
1193free_pages:
1194 list_for_each_entry_safe(bpage, tmp, pages, list) {
1195 list_del_init(&bpage->list);
1196 free_buffer_page(bpage);
1197 }
1198
1199 return -ENOMEM;
1200}
1201
1202static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1203 unsigned nr_pages)
1204{
1205 LIST_HEAD(pages);
1206
1207 WARN_ON(!nr_pages);
1208
1209 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1210 return -ENOMEM;
1211
1212
1213
1214
1215
1216
1217 cpu_buffer->pages = pages.next;
1218 list_del(&pages);
1219
1220 cpu_buffer->nr_pages = nr_pages;
1221
1222 rb_check_pages(cpu_buffer);
1223
1224 return 0;
1225}
1226
1227static struct ring_buffer_per_cpu *
1228rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1229{
1230 struct ring_buffer_per_cpu *cpu_buffer;
1231 struct buffer_page *bpage;
1232 struct page *page;
1233 int ret;
1234
1235 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1236 GFP_KERNEL, cpu_to_node(cpu));
1237 if (!cpu_buffer)
1238 return NULL;
1239
1240 cpu_buffer->cpu = cpu;
1241 cpu_buffer->buffer = buffer;
1242 raw_spin_lock_init(&cpu_buffer->reader_lock);
1243 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1244 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1245 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1246 init_completion(&cpu_buffer->update_done);
1247 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1248 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1249 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1250
1251 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1252 GFP_KERNEL, cpu_to_node(cpu));
1253 if (!bpage)
1254 goto fail_free_buffer;
1255
1256 rb_check_bpage(cpu_buffer, bpage);
1257
1258 cpu_buffer->reader_page = bpage;
1259 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1260 if (!page)
1261 goto fail_free_reader;
1262 bpage->page = page_address(page);
1263 rb_init_page(bpage->page);
1264
1265 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1266 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1267
1268 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1269 if (ret < 0)
1270 goto fail_free_reader;
1271
1272 cpu_buffer->head_page
1273 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1274 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1275
1276 rb_head_page_activate(cpu_buffer);
1277
1278 return cpu_buffer;
1279
1280 fail_free_reader:
1281 free_buffer_page(cpu_buffer->reader_page);
1282
1283 fail_free_buffer:
1284 kfree(cpu_buffer);
1285 return NULL;
1286}
1287
1288static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1289{
1290 struct list_head *head = cpu_buffer->pages;
1291 struct buffer_page *bpage, *tmp;
1292
1293 free_buffer_page(cpu_buffer->reader_page);
1294
1295 rb_head_page_deactivate(cpu_buffer);
1296
1297 if (head) {
1298 list_for_each_entry_safe(bpage, tmp, head, list) {
1299 list_del_init(&bpage->list);
1300 free_buffer_page(bpage);
1301 }
1302 bpage = list_entry(head, struct buffer_page, list);
1303 free_buffer_page(bpage);
1304 }
1305
1306 kfree(cpu_buffer);
1307}
1308
1309#ifdef CONFIG_HOTPLUG_CPU
1310static int rb_cpu_notify(struct notifier_block *self,
1311 unsigned long action, void *hcpu);
1312#endif
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1325 struct lock_class_key *key)
1326{
1327 struct ring_buffer *buffer;
1328 int bsize;
1329 int cpu, nr_pages;
1330
1331
1332 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1333 GFP_KERNEL);
1334 if (!buffer)
1335 return NULL;
1336
1337 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1338 goto fail_free_buffer;
1339
1340 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1341 buffer->flags = flags;
1342 buffer->clock = trace_clock_local;
1343 buffer->reader_lock_key = key;
1344
1345 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1346 init_waitqueue_head(&buffer->irq_work.waiters);
1347
1348
1349 if (nr_pages < 2)
1350 nr_pages = 2;
1351
1352
1353
1354
1355
1356
1357#ifdef CONFIG_HOTPLUG_CPU
1358 cpu_notifier_register_begin();
1359 cpumask_copy(buffer->cpumask, cpu_online_mask);
1360#else
1361 cpumask_copy(buffer->cpumask, cpu_possible_mask);
1362#endif
1363 buffer->cpus = nr_cpu_ids;
1364
1365 bsize = sizeof(void *) * nr_cpu_ids;
1366 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1367 GFP_KERNEL);
1368 if (!buffer->buffers)
1369 goto fail_free_cpumask;
1370
1371 for_each_buffer_cpu(buffer, cpu) {
1372 buffer->buffers[cpu] =
1373 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1374 if (!buffer->buffers[cpu])
1375 goto fail_free_buffers;
1376 }
1377
1378#ifdef CONFIG_HOTPLUG_CPU
1379 buffer->cpu_notify.notifier_call = rb_cpu_notify;
1380 buffer->cpu_notify.priority = 0;
1381 __register_cpu_notifier(&buffer->cpu_notify);
1382 cpu_notifier_register_done();
1383#endif
1384
1385 mutex_init(&buffer->mutex);
1386
1387 return buffer;
1388
1389 fail_free_buffers:
1390 for_each_buffer_cpu(buffer, cpu) {
1391 if (buffer->buffers[cpu])
1392 rb_free_cpu_buffer(buffer->buffers[cpu]);
1393 }
1394 kfree(buffer->buffers);
1395
1396 fail_free_cpumask:
1397 free_cpumask_var(buffer->cpumask);
1398#ifdef CONFIG_HOTPLUG_CPU
1399 cpu_notifier_register_done();
1400#endif
1401
1402 fail_free_buffer:
1403 kfree(buffer);
1404 return NULL;
1405}
1406EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1407
1408
1409
1410
1411
1412void
1413ring_buffer_free(struct ring_buffer *buffer)
1414{
1415 int cpu;
1416
1417#ifdef CONFIG_HOTPLUG_CPU
1418 cpu_notifier_register_begin();
1419 __unregister_cpu_notifier(&buffer->cpu_notify);
1420#endif
1421
1422 for_each_buffer_cpu(buffer, cpu)
1423 rb_free_cpu_buffer(buffer->buffers[cpu]);
1424
1425#ifdef CONFIG_HOTPLUG_CPU
1426 cpu_notifier_register_done();
1427#endif
1428
1429 kfree(buffer->buffers);
1430 free_cpumask_var(buffer->cpumask);
1431
1432 kfree(buffer);
1433}
1434EXPORT_SYMBOL_GPL(ring_buffer_free);
1435
1436void ring_buffer_set_clock(struct ring_buffer *buffer,
1437 u64 (*clock)(void))
1438{
1439 buffer->clock = clock;
1440}
1441
1442static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1443
1444static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1445{
1446 return local_read(&bpage->entries) & RB_WRITE_MASK;
1447}
1448
1449static inline unsigned long rb_page_write(struct buffer_page *bpage)
1450{
1451 return local_read(&bpage->write) & RB_WRITE_MASK;
1452}
1453
1454static int
1455rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1456{
1457 struct list_head *tail_page, *to_remove, *next_page;
1458 struct buffer_page *to_remove_page, *tmp_iter_page;
1459 struct buffer_page *last_page, *first_page;
1460 unsigned int nr_removed;
1461 unsigned long head_bit;
1462 int page_entries;
1463
1464 head_bit = 0;
1465
1466 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1467 atomic_inc(&cpu_buffer->record_disabled);
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477 tail_page = &cpu_buffer->tail_page->list;
1478
1479
1480
1481
1482
1483 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1484 tail_page = rb_list_head(tail_page->next);
1485 to_remove = tail_page;
1486
1487
1488 first_page = list_entry(rb_list_head(to_remove->next),
1489 struct buffer_page, list);
1490
1491 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1492 to_remove = rb_list_head(to_remove)->next;
1493 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1494 }
1495
1496 next_page = rb_list_head(to_remove)->next;
1497
1498
1499
1500
1501
1502
1503 tail_page->next = (struct list_head *)((unsigned long)next_page |
1504 head_bit);
1505 next_page = rb_list_head(next_page);
1506 next_page->prev = tail_page;
1507
1508
1509 cpu_buffer->pages = next_page;
1510
1511
1512 if (head_bit)
1513 cpu_buffer->head_page = list_entry(next_page,
1514 struct buffer_page, list);
1515
1516
1517
1518
1519
1520 cpu_buffer->read = 0;
1521
1522
1523 atomic_dec(&cpu_buffer->record_disabled);
1524 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1525
1526 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1527
1528
1529 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1530 list);
1531 tmp_iter_page = first_page;
1532
1533 do {
1534 to_remove_page = tmp_iter_page;
1535 rb_inc_page(cpu_buffer, &tmp_iter_page);
1536
1537
1538 page_entries = rb_page_entries(to_remove_page);
1539 if (page_entries) {
1540
1541
1542
1543
1544
1545
1546 local_add(page_entries, &cpu_buffer->overrun);
1547 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1548 }
1549
1550
1551
1552
1553
1554 free_buffer_page(to_remove_page);
1555 nr_removed--;
1556
1557 } while (to_remove_page != last_page);
1558
1559 RB_WARN_ON(cpu_buffer, nr_removed);
1560
1561 return nr_removed == 0;
1562}
1563
1564static int
1565rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1566{
1567 struct list_head *pages = &cpu_buffer->new_pages;
1568 int retries, success;
1569
1570 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585 retries = 10;
1586 success = 0;
1587 while (retries--) {
1588 struct list_head *head_page, *prev_page, *r;
1589 struct list_head *last_page, *first_page;
1590 struct list_head *head_page_with_bit;
1591
1592 head_page = &rb_set_head_page(cpu_buffer)->list;
1593 if (!head_page)
1594 break;
1595 prev_page = head_page->prev;
1596
1597 first_page = pages->next;
1598 last_page = pages->prev;
1599
1600 head_page_with_bit = (struct list_head *)
1601 ((unsigned long)head_page | RB_PAGE_HEAD);
1602
1603 last_page->next = head_page_with_bit;
1604 first_page->prev = prev_page;
1605
1606 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1607
1608 if (r == head_page_with_bit) {
1609
1610
1611
1612
1613
1614 head_page->prev = last_page;
1615 success = 1;
1616 break;
1617 }
1618 }
1619
1620 if (success)
1621 INIT_LIST_HEAD(pages);
1622
1623
1624
1625
1626 RB_WARN_ON(cpu_buffer, !success);
1627 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1628
1629
1630 if (!success) {
1631 struct buffer_page *bpage, *tmp;
1632 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1633 list) {
1634 list_del_init(&bpage->list);
1635 free_buffer_page(bpage);
1636 }
1637 }
1638 return success;
1639}
1640
1641static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1642{
1643 int success;
1644
1645 if (cpu_buffer->nr_pages_to_update > 0)
1646 success = rb_insert_pages(cpu_buffer);
1647 else
1648 success = rb_remove_pages(cpu_buffer,
1649 -cpu_buffer->nr_pages_to_update);
1650
1651 if (success)
1652 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1653}
1654
1655static void update_pages_handler(struct work_struct *work)
1656{
1657 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1658 struct ring_buffer_per_cpu, update_pages_work);
1659 rb_update_pages(cpu_buffer);
1660 complete(&cpu_buffer->update_done);
1661}
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1674 int cpu_id)
1675{
1676 struct ring_buffer_per_cpu *cpu_buffer;
1677 unsigned nr_pages;
1678 int cpu, err = 0;
1679
1680
1681
1682
1683 if (!buffer)
1684 return size;
1685
1686
1687 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1688 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1689 return size;
1690
1691 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1692 size *= BUF_PAGE_SIZE;
1693
1694
1695 if (size < BUF_PAGE_SIZE * 2)
1696 size = BUF_PAGE_SIZE * 2;
1697
1698 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1699
1700
1701
1702
1703
1704
1705 if (atomic_read(&buffer->resize_disabled))
1706 return -EBUSY;
1707
1708
1709 mutex_lock(&buffer->mutex);
1710
1711 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1712
1713 for_each_buffer_cpu(buffer, cpu) {
1714 cpu_buffer = buffer->buffers[cpu];
1715
1716 cpu_buffer->nr_pages_to_update = nr_pages -
1717 cpu_buffer->nr_pages;
1718
1719
1720
1721 if (cpu_buffer->nr_pages_to_update <= 0)
1722 continue;
1723
1724
1725
1726
1727 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1728 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1729 &cpu_buffer->new_pages, cpu)) {
1730
1731 err = -ENOMEM;
1732 goto out_err;
1733 }
1734 }
1735
1736 get_online_cpus();
1737
1738
1739
1740
1741
1742 for_each_buffer_cpu(buffer, cpu) {
1743 cpu_buffer = buffer->buffers[cpu];
1744 if (!cpu_buffer->nr_pages_to_update)
1745 continue;
1746
1747
1748 if (!cpu_online(cpu)) {
1749 rb_update_pages(cpu_buffer);
1750 cpu_buffer->nr_pages_to_update = 0;
1751 } else {
1752 schedule_work_on(cpu,
1753 &cpu_buffer->update_pages_work);
1754 }
1755 }
1756
1757
1758 for_each_buffer_cpu(buffer, cpu) {
1759 cpu_buffer = buffer->buffers[cpu];
1760 if (!cpu_buffer->nr_pages_to_update)
1761 continue;
1762
1763 if (cpu_online(cpu))
1764 wait_for_completion(&cpu_buffer->update_done);
1765 cpu_buffer->nr_pages_to_update = 0;
1766 }
1767
1768 put_online_cpus();
1769 } else {
1770
1771 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1772 goto out;
1773
1774 cpu_buffer = buffer->buffers[cpu_id];
1775
1776 if (nr_pages == cpu_buffer->nr_pages)
1777 goto out;
1778
1779 cpu_buffer->nr_pages_to_update = nr_pages -
1780 cpu_buffer->nr_pages;
1781
1782 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1783 if (cpu_buffer->nr_pages_to_update > 0 &&
1784 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1785 &cpu_buffer->new_pages, cpu_id)) {
1786 err = -ENOMEM;
1787 goto out_err;
1788 }
1789
1790 get_online_cpus();
1791
1792
1793 if (!cpu_online(cpu_id))
1794 rb_update_pages(cpu_buffer);
1795 else {
1796 schedule_work_on(cpu_id,
1797 &cpu_buffer->update_pages_work);
1798 wait_for_completion(&cpu_buffer->update_done);
1799 }
1800
1801 cpu_buffer->nr_pages_to_update = 0;
1802 put_online_cpus();
1803 }
1804
1805 out:
1806
1807
1808
1809
1810
1811
1812
1813 if (atomic_read(&buffer->record_disabled)) {
1814 atomic_inc(&buffer->record_disabled);
1815
1816
1817
1818
1819
1820
1821 synchronize_sched();
1822 for_each_buffer_cpu(buffer, cpu) {
1823 cpu_buffer = buffer->buffers[cpu];
1824 rb_check_pages(cpu_buffer);
1825 }
1826 atomic_dec(&buffer->record_disabled);
1827 }
1828
1829 mutex_unlock(&buffer->mutex);
1830 return size;
1831
1832 out_err:
1833 for_each_buffer_cpu(buffer, cpu) {
1834 struct buffer_page *bpage, *tmp;
1835
1836 cpu_buffer = buffer->buffers[cpu];
1837 cpu_buffer->nr_pages_to_update = 0;
1838
1839 if (list_empty(&cpu_buffer->new_pages))
1840 continue;
1841
1842 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1843 list) {
1844 list_del_init(&bpage->list);
1845 free_buffer_page(bpage);
1846 }
1847 }
1848 mutex_unlock(&buffer->mutex);
1849 return err;
1850}
1851EXPORT_SYMBOL_GPL(ring_buffer_resize);
1852
1853void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1854{
1855 mutex_lock(&buffer->mutex);
1856 if (val)
1857 buffer->flags |= RB_FL_OVERWRITE;
1858 else
1859 buffer->flags &= ~RB_FL_OVERWRITE;
1860 mutex_unlock(&buffer->mutex);
1861}
1862EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1863
1864static inline void *
1865__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1866{
1867 return bpage->data + index;
1868}
1869
1870static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1871{
1872 return bpage->page->data + index;
1873}
1874
1875static inline struct ring_buffer_event *
1876rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1877{
1878 return __rb_page_index(cpu_buffer->reader_page,
1879 cpu_buffer->reader_page->read);
1880}
1881
1882static inline struct ring_buffer_event *
1883rb_iter_head_event(struct ring_buffer_iter *iter)
1884{
1885 return __rb_page_index(iter->head_page, iter->head);
1886}
1887
1888static inline unsigned rb_page_commit(struct buffer_page *bpage)
1889{
1890 return local_read(&bpage->page->commit);
1891}
1892
1893
1894static inline unsigned rb_page_size(struct buffer_page *bpage)
1895{
1896 return rb_page_commit(bpage);
1897}
1898
1899static inline unsigned
1900rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1901{
1902 return rb_page_commit(cpu_buffer->commit_page);
1903}
1904
1905static inline unsigned
1906rb_event_index(struct ring_buffer_event *event)
1907{
1908 unsigned long addr = (unsigned long)event;
1909
1910 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1911}
1912
1913static inline int
1914rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1915 struct ring_buffer_event *event)
1916{
1917 unsigned long addr = (unsigned long)event;
1918 unsigned long index;
1919
1920 index = rb_event_index(event);
1921 addr &= PAGE_MASK;
1922
1923 return cpu_buffer->commit_page->page == (void *)addr &&
1924 rb_commit_index(cpu_buffer) == index;
1925}
1926
1927static void
1928rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1929{
1930 unsigned long max_count;
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940 again:
1941 max_count = cpu_buffer->nr_pages * 100;
1942
1943 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1944 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1945 return;
1946 if (RB_WARN_ON(cpu_buffer,
1947 rb_is_reader_page(cpu_buffer->tail_page)))
1948 return;
1949 local_set(&cpu_buffer->commit_page->page->commit,
1950 rb_page_write(cpu_buffer->commit_page));
1951 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1952 cpu_buffer->write_stamp =
1953 cpu_buffer->commit_page->page->time_stamp;
1954
1955 barrier();
1956 }
1957 while (rb_commit_index(cpu_buffer) !=
1958 rb_page_write(cpu_buffer->commit_page)) {
1959
1960 local_set(&cpu_buffer->commit_page->page->commit,
1961 rb_page_write(cpu_buffer->commit_page));
1962 RB_WARN_ON(cpu_buffer,
1963 local_read(&cpu_buffer->commit_page->page->commit) &
1964 ~RB_WRITE_MASK);
1965 barrier();
1966 }
1967
1968
1969 barrier();
1970
1971
1972
1973
1974
1975
1976 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
1977 goto again;
1978}
1979
1980static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1981{
1982 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
1983 cpu_buffer->reader_page->read = 0;
1984}
1985
1986static void rb_inc_iter(struct ring_buffer_iter *iter)
1987{
1988 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1989
1990
1991
1992
1993
1994
1995
1996 if (iter->head_page == cpu_buffer->reader_page)
1997 iter->head_page = rb_set_head_page(cpu_buffer);
1998 else
1999 rb_inc_page(cpu_buffer, &iter->head_page);
2000
2001 iter->read_stamp = iter->head_page->page->time_stamp;
2002 iter->head = 0;
2003}
2004
2005
2006static noinline struct ring_buffer_event *
2007rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
2008{
2009 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2010
2011
2012 if (rb_event_index(event)) {
2013 event->time_delta = delta & TS_MASK;
2014 event->array[0] = delta >> TS_SHIFT;
2015 } else {
2016
2017 event->time_delta = 0;
2018 event->array[0] = 0;
2019 }
2020
2021 return skip_time_extend(event);
2022}
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035static void
2036rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2037 struct ring_buffer_event *event, unsigned length,
2038 int add_timestamp, u64 delta)
2039{
2040
2041 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2042 delta = 0;
2043
2044
2045
2046
2047
2048 if (unlikely(add_timestamp)) {
2049 event = rb_add_time_stamp(event, delta);
2050 length -= RB_LEN_TIME_EXTEND;
2051 delta = 0;
2052 }
2053
2054 event->time_delta = delta;
2055 length -= RB_EVNT_HDR_SIZE;
2056 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2057 event->type_len = 0;
2058 event->array[0] = length;
2059 } else
2060 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2061}
2062
2063
2064
2065
2066
2067
2068
2069
2070static int
2071rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
2072 struct buffer_page *tail_page,
2073 struct buffer_page *next_page)
2074{
2075 struct buffer_page *new_head;
2076 int entries;
2077 int type;
2078 int ret;
2079
2080 entries = rb_page_entries(next_page);
2081
2082
2083
2084
2085
2086
2087 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
2088 RB_PAGE_HEAD);
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101 switch (type) {
2102 case RB_PAGE_HEAD:
2103
2104
2105
2106
2107
2108 local_add(entries, &cpu_buffer->overrun);
2109 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
2110
2111
2112
2113
2114
2115
2116
2117 break;
2118
2119 case RB_PAGE_UPDATE:
2120
2121
2122
2123
2124 break;
2125 case RB_PAGE_NORMAL:
2126
2127
2128
2129
2130
2131 return 1;
2132 case RB_PAGE_MOVED:
2133
2134
2135
2136
2137
2138 return 1;
2139 default:
2140 RB_WARN_ON(cpu_buffer, 1);
2141 return -1;
2142 }
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158 new_head = next_page;
2159 rb_inc_page(cpu_buffer, &new_head);
2160
2161 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
2162 RB_PAGE_NORMAL);
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172 switch (ret) {
2173 case RB_PAGE_HEAD:
2174 case RB_PAGE_NORMAL:
2175
2176 break;
2177 default:
2178 RB_WARN_ON(cpu_buffer, 1);
2179 return -1;
2180 }
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192 if (ret == RB_PAGE_NORMAL) {
2193
2194
2195
2196
2197 if (cpu_buffer->tail_page != tail_page &&
2198 cpu_buffer->tail_page != next_page)
2199 rb_head_page_set_normal(cpu_buffer, new_head,
2200 next_page,
2201 RB_PAGE_HEAD);
2202 }
2203
2204
2205
2206
2207
2208
2209 if (type == RB_PAGE_HEAD) {
2210 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2211 tail_page,
2212 RB_PAGE_UPDATE);
2213 if (RB_WARN_ON(cpu_buffer,
2214 ret != RB_PAGE_UPDATE))
2215 return -1;
2216 }
2217
2218 return 0;
2219}
2220
2221static unsigned rb_calculate_event_length(unsigned length)
2222{
2223 struct ring_buffer_event event;
2224
2225
2226 if (!length)
2227 length = 1;
2228
2229 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2230 length += sizeof(event.array[0]);
2231
2232 length += RB_EVNT_HDR_SIZE;
2233 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2234
2235 return length;
2236}
2237
2238static inline void
2239rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2240 struct buffer_page *tail_page,
2241 unsigned long tail, unsigned long length)
2242{
2243 struct ring_buffer_event *event;
2244
2245
2246
2247
2248
2249 if (tail >= BUF_PAGE_SIZE) {
2250
2251
2252
2253
2254
2255 if (tail == BUF_PAGE_SIZE)
2256 tail_page->real_end = 0;
2257
2258 local_sub(length, &tail_page->write);
2259 return;
2260 }
2261
2262 event = __rb_page_index(tail_page, tail);
2263 kmemcheck_annotate_bitfield(event, bitfield);
2264
2265
2266 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2267
2268
2269
2270
2271
2272
2273 tail_page->real_end = tail;
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2287
2288
2289
2290 rb_event_set_padding(event);
2291
2292
2293 local_sub(length, &tail_page->write);
2294 return;
2295 }
2296
2297
2298 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2299 event->type_len = RINGBUF_TYPE_PADDING;
2300
2301 event->time_delta = 1;
2302
2303
2304 length = (tail + length) - BUF_PAGE_SIZE;
2305 local_sub(length, &tail_page->write);
2306}
2307
2308
2309
2310
2311static noinline struct ring_buffer_event *
2312rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2313 unsigned long length, unsigned long tail,
2314 struct buffer_page *tail_page, u64 ts)
2315{
2316 struct buffer_page *commit_page = cpu_buffer->commit_page;
2317 struct ring_buffer *buffer = cpu_buffer->buffer;
2318 struct buffer_page *next_page;
2319 int ret;
2320
2321 next_page = tail_page;
2322
2323 rb_inc_page(cpu_buffer, &next_page);
2324
2325
2326
2327
2328
2329
2330 if (unlikely(next_page == commit_page)) {
2331 local_inc(&cpu_buffer->commit_overrun);
2332 goto out_reset;
2333 }
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2350
2351
2352
2353
2354
2355 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2356
2357
2358
2359
2360 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2361 local_inc(&cpu_buffer->dropped_events);
2362 goto out_reset;
2363 }
2364
2365 ret = rb_handle_head_page(cpu_buffer,
2366 tail_page,
2367 next_page);
2368 if (ret < 0)
2369 goto out_reset;
2370 if (ret)
2371 goto out_again;
2372 } else {
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383 if (unlikely((cpu_buffer->commit_page !=
2384 cpu_buffer->tail_page) &&
2385 (cpu_buffer->commit_page ==
2386 cpu_buffer->reader_page))) {
2387 local_inc(&cpu_buffer->commit_overrun);
2388 goto out_reset;
2389 }
2390 }
2391 }
2392
2393 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
2394 if (ret) {
2395
2396
2397
2398
2399 ts = rb_time_stamp(buffer);
2400 next_page->page->time_stamp = ts;
2401 }
2402
2403 out_again:
2404
2405 rb_reset_tail(cpu_buffer, tail_page, tail, length);
2406
2407
2408 return ERR_PTR(-EAGAIN);
2409
2410 out_reset:
2411
2412 rb_reset_tail(cpu_buffer, tail_page, tail, length);
2413
2414 return NULL;
2415}
2416
2417static struct ring_buffer_event *
2418__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2419 unsigned long length, u64 ts,
2420 u64 delta, int add_timestamp)
2421{
2422 struct buffer_page *tail_page;
2423 struct ring_buffer_event *event;
2424 unsigned long tail, write;
2425
2426
2427
2428
2429
2430
2431 if (unlikely(add_timestamp))
2432 length += RB_LEN_TIME_EXTEND;
2433
2434 tail_page = cpu_buffer->tail_page;
2435 write = local_add_return(length, &tail_page->write);
2436
2437
2438 write &= RB_WRITE_MASK;
2439 tail = write - length;
2440
2441
2442
2443
2444
2445 if (!tail)
2446 delta = 0;
2447
2448
2449 if (unlikely(write > BUF_PAGE_SIZE))
2450 return rb_move_tail(cpu_buffer, length, tail,
2451 tail_page, ts);
2452
2453
2454
2455 event = __rb_page_index(tail_page, tail);
2456 kmemcheck_annotate_bitfield(event, bitfield);
2457 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
2458
2459 local_inc(&tail_page->entries);
2460
2461
2462
2463
2464
2465 if (!tail)
2466 tail_page->page->time_stamp = ts;
2467
2468
2469 local_add(length, &cpu_buffer->entries_bytes);
2470
2471 return event;
2472}
2473
2474static inline int
2475rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2476 struct ring_buffer_event *event)
2477{
2478 unsigned long new_index, old_index;
2479 struct buffer_page *bpage;
2480 unsigned long index;
2481 unsigned long addr;
2482
2483 new_index = rb_event_index(event);
2484 old_index = new_index + rb_event_ts_length(event);
2485 addr = (unsigned long)event;
2486 addr &= PAGE_MASK;
2487
2488 bpage = cpu_buffer->tail_page;
2489
2490 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2491 unsigned long write_mask =
2492 local_read(&bpage->write) & ~RB_WRITE_MASK;
2493 unsigned long event_length = rb_event_length(event);
2494
2495
2496
2497
2498
2499
2500 old_index += write_mask;
2501 new_index += write_mask;
2502 index = local_cmpxchg(&bpage->write, old_index, new_index);
2503 if (index == old_index) {
2504
2505 local_sub(event_length, &cpu_buffer->entries_bytes);
2506 return 1;
2507 }
2508 }
2509
2510
2511 return 0;
2512}
2513
2514static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2515{
2516 local_inc(&cpu_buffer->committing);
2517 local_inc(&cpu_buffer->commits);
2518}
2519
2520static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2521{
2522 unsigned long commits;
2523
2524 if (RB_WARN_ON(cpu_buffer,
2525 !local_read(&cpu_buffer->committing)))
2526 return;
2527
2528 again:
2529 commits = local_read(&cpu_buffer->commits);
2530
2531 barrier();
2532 if (local_read(&cpu_buffer->committing) == 1)
2533 rb_set_commit_to_write(cpu_buffer);
2534
2535 local_dec(&cpu_buffer->committing);
2536
2537
2538 barrier();
2539
2540
2541
2542
2543
2544
2545 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2546 !local_read(&cpu_buffer->committing)) {
2547 local_inc(&cpu_buffer->committing);
2548 goto again;
2549 }
2550}
2551
2552static struct ring_buffer_event *
2553rb_reserve_next_event(struct ring_buffer *buffer,
2554 struct ring_buffer_per_cpu *cpu_buffer,
2555 unsigned long length)
2556{
2557 struct ring_buffer_event *event;
2558 u64 ts, delta;
2559 int nr_loops = 0;
2560 int add_timestamp;
2561 u64 diff;
2562
2563 rb_start_commit(cpu_buffer);
2564
2565#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2566
2567
2568
2569
2570
2571
2572 barrier();
2573 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2574 local_dec(&cpu_buffer->committing);
2575 local_dec(&cpu_buffer->commits);
2576 return NULL;
2577 }
2578#endif
2579
2580 length = rb_calculate_event_length(length);
2581 again:
2582 add_timestamp = 0;
2583 delta = 0;
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2595 goto out_fail;
2596
2597 ts = rb_time_stamp(cpu_buffer->buffer);
2598 diff = ts - cpu_buffer->write_stamp;
2599
2600
2601 barrier();
2602
2603
2604 if (likely(ts >= cpu_buffer->write_stamp)) {
2605 delta = diff;
2606 if (unlikely(test_time_stamp(delta))) {
2607 int local_clock_stable = 1;
2608#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2609 local_clock_stable = sched_clock_stable();
2610#endif
2611 WARN_ONCE(delta > (1ULL << 59),
2612 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2613 (unsigned long long)delta,
2614 (unsigned long long)ts,
2615 (unsigned long long)cpu_buffer->write_stamp,
2616 local_clock_stable ? "" :
2617 "If you just came from a suspend/resume,\n"
2618 "please switch to the trace global clock:\n"
2619 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2620 add_timestamp = 1;
2621 }
2622 }
2623
2624 event = __rb_reserve_next(cpu_buffer, length, ts,
2625 delta, add_timestamp);
2626 if (unlikely(PTR_ERR(event) == -EAGAIN))
2627 goto again;
2628
2629 if (!event)
2630 goto out_fail;
2631
2632 return event;
2633
2634 out_fail:
2635 rb_end_commit(cpu_buffer);
2636 return NULL;
2637}
2638
2639#ifdef CONFIG_TRACING
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678static DEFINE_PER_CPU(unsigned int, current_context);
2679
2680static __always_inline int trace_recursive_lock(void)
2681{
2682 unsigned int val = __this_cpu_read(current_context);
2683 int bit;
2684
2685 if (in_interrupt()) {
2686 if (in_nmi())
2687 bit = 0;
2688 else if (in_irq())
2689 bit = 1;
2690 else
2691 bit = 2;
2692 } else
2693 bit = 3;
2694
2695 if (unlikely(val & (1 << bit)))
2696 return 1;
2697
2698 val |= (1 << bit);
2699 __this_cpu_write(current_context, val);
2700
2701 return 0;
2702}
2703
2704static __always_inline void trace_recursive_unlock(void)
2705{
2706 __this_cpu_and(current_context, __this_cpu_read(current_context) - 1);
2707}
2708
2709#else
2710
2711#define trace_recursive_lock() (0)
2712#define trace_recursive_unlock() do { } while (0)
2713
2714#endif
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731struct ring_buffer_event *
2732ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2733{
2734 struct ring_buffer_per_cpu *cpu_buffer;
2735 struct ring_buffer_event *event;
2736 int cpu;
2737
2738 if (ring_buffer_flags != RB_BUFFERS_ON)
2739 return NULL;
2740
2741
2742 preempt_disable_notrace();
2743
2744 if (atomic_read(&buffer->record_disabled))
2745 goto out_nocheck;
2746
2747 if (trace_recursive_lock())
2748 goto out_nocheck;
2749
2750 cpu = raw_smp_processor_id();
2751
2752 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2753 goto out;
2754
2755 cpu_buffer = buffer->buffers[cpu];
2756
2757 if (atomic_read(&cpu_buffer->record_disabled))
2758 goto out;
2759
2760 if (length > BUF_MAX_DATA_SIZE)
2761 goto out;
2762
2763 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2764 if (!event)
2765 goto out;
2766
2767 return event;
2768
2769 out:
2770 trace_recursive_unlock();
2771
2772 out_nocheck:
2773 preempt_enable_notrace();
2774 return NULL;
2775}
2776EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
2777
2778static void
2779rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2780 struct ring_buffer_event *event)
2781{
2782 u64 delta;
2783
2784
2785
2786
2787
2788 if (rb_event_is_commit(cpu_buffer, event)) {
2789
2790
2791
2792
2793 if (!rb_event_index(event))
2794 cpu_buffer->write_stamp =
2795 cpu_buffer->commit_page->page->time_stamp;
2796 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2797 delta = event->array[0];
2798 delta <<= TS_SHIFT;
2799 delta += event->time_delta;
2800 cpu_buffer->write_stamp += delta;
2801 } else
2802 cpu_buffer->write_stamp += event->time_delta;
2803 }
2804}
2805
2806static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2807 struct ring_buffer_event *event)
2808{
2809 local_inc(&cpu_buffer->entries);
2810 rb_update_write_stamp(cpu_buffer, event);
2811 rb_end_commit(cpu_buffer);
2812}
2813
2814static __always_inline void
2815rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2816{
2817 bool pagebusy;
2818
2819 if (buffer->irq_work.waiters_pending) {
2820 buffer->irq_work.waiters_pending = false;
2821
2822 irq_work_queue(&buffer->irq_work.work);
2823 }
2824
2825 if (cpu_buffer->irq_work.waiters_pending) {
2826 cpu_buffer->irq_work.waiters_pending = false;
2827
2828 irq_work_queue(&cpu_buffer->irq_work.work);
2829 }
2830
2831 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2832
2833 if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2834 cpu_buffer->irq_work.wakeup_full = true;
2835 cpu_buffer->irq_work.full_waiters_pending = false;
2836
2837 irq_work_queue(&cpu_buffer->irq_work.work);
2838 }
2839}
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2851 struct ring_buffer_event *event)
2852{
2853 struct ring_buffer_per_cpu *cpu_buffer;
2854 int cpu = raw_smp_processor_id();
2855
2856 cpu_buffer = buffer->buffers[cpu];
2857
2858 rb_commit(cpu_buffer, event);
2859
2860 rb_wakeups(buffer, cpu_buffer);
2861
2862 trace_recursive_unlock();
2863
2864 preempt_enable_notrace();
2865
2866 return 0;
2867}
2868EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2869
2870static inline void rb_event_discard(struct ring_buffer_event *event)
2871{
2872 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2873 event = skip_time_extend(event);
2874
2875
2876 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2877 event->type_len = RINGBUF_TYPE_PADDING;
2878
2879 if (!event->time_delta)
2880 event->time_delta = 1;
2881}
2882
2883
2884
2885
2886
2887
2888
2889static inline void
2890rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2891 struct ring_buffer_event *event)
2892{
2893 unsigned long addr = (unsigned long)event;
2894 struct buffer_page *bpage = cpu_buffer->commit_page;
2895 struct buffer_page *start;
2896
2897 addr &= PAGE_MASK;
2898
2899
2900 if (likely(bpage->page == (void *)addr)) {
2901 local_dec(&bpage->entries);
2902 return;
2903 }
2904
2905
2906
2907
2908
2909 rb_inc_page(cpu_buffer, &bpage);
2910 start = bpage;
2911 do {
2912 if (bpage->page == (void *)addr) {
2913 local_dec(&bpage->entries);
2914 return;
2915 }
2916 rb_inc_page(cpu_buffer, &bpage);
2917 } while (bpage != start);
2918
2919
2920 RB_WARN_ON(cpu_buffer, 1);
2921}
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942void ring_buffer_discard_commit(struct ring_buffer *buffer,
2943 struct ring_buffer_event *event)
2944{
2945 struct ring_buffer_per_cpu *cpu_buffer;
2946 int cpu;
2947
2948
2949 rb_event_discard(event);
2950
2951 cpu = smp_processor_id();
2952 cpu_buffer = buffer->buffers[cpu];
2953
2954
2955
2956
2957
2958
2959 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
2960
2961 rb_decrement_entry(cpu_buffer, event);
2962 if (rb_try_to_discard(cpu_buffer, event))
2963 goto out;
2964
2965
2966
2967
2968
2969 rb_update_write_stamp(cpu_buffer, event);
2970 out:
2971 rb_end_commit(cpu_buffer);
2972
2973 trace_recursive_unlock();
2974
2975 preempt_enable_notrace();
2976
2977}
2978EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993int ring_buffer_write(struct ring_buffer *buffer,
2994 unsigned long length,
2995 void *data)
2996{
2997 struct ring_buffer_per_cpu *cpu_buffer;
2998 struct ring_buffer_event *event;
2999 void *body;
3000 int ret = -EBUSY;
3001 int cpu;
3002
3003 if (ring_buffer_flags != RB_BUFFERS_ON)
3004 return -EBUSY;
3005
3006 preempt_disable_notrace();
3007
3008 if (atomic_read(&buffer->record_disabled))
3009 goto out;
3010
3011 cpu = raw_smp_processor_id();
3012
3013 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3014 goto out;
3015
3016 cpu_buffer = buffer->buffers[cpu];
3017
3018 if (atomic_read(&cpu_buffer->record_disabled))
3019 goto out;
3020
3021 if (length > BUF_MAX_DATA_SIZE)
3022 goto out;
3023
3024 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3025 if (!event)
3026 goto out;
3027
3028 body = rb_event_data(event);
3029
3030 memcpy(body, data, length);
3031
3032 rb_commit(cpu_buffer, event);
3033
3034 rb_wakeups(buffer, cpu_buffer);
3035
3036 ret = 0;
3037 out:
3038 preempt_enable_notrace();
3039
3040 return ret;
3041}
3042EXPORT_SYMBOL_GPL(ring_buffer_write);
3043
3044static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3045{
3046 struct buffer_page *reader = cpu_buffer->reader_page;
3047 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3048 struct buffer_page *commit = cpu_buffer->commit_page;
3049
3050
3051 if (unlikely(!head))
3052 return 1;
3053
3054 return reader->read == rb_page_commit(reader) &&
3055 (commit == reader ||
3056 (commit == head &&
3057 head->read == rb_page_commit(commit)));
3058}
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069void ring_buffer_record_disable(struct ring_buffer *buffer)
3070{
3071 atomic_inc(&buffer->record_disabled);
3072}
3073EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3074
3075
3076
3077
3078
3079
3080
3081
3082void ring_buffer_record_enable(struct ring_buffer *buffer)
3083{
3084 atomic_dec(&buffer->record_disabled);
3085}
3086EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099void ring_buffer_record_off(struct ring_buffer *buffer)
3100{
3101 unsigned int rd;
3102 unsigned int new_rd;
3103
3104 do {
3105 rd = atomic_read(&buffer->record_disabled);
3106 new_rd = rd | RB_BUFFER_OFF;
3107 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3108}
3109EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122void ring_buffer_record_on(struct ring_buffer *buffer)
3123{
3124 unsigned int rd;
3125 unsigned int new_rd;
3126
3127 do {
3128 rd = atomic_read(&buffer->record_disabled);
3129 new_rd = rd & ~RB_BUFFER_OFF;
3130 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3131}
3132EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3133
3134
3135
3136
3137
3138
3139
3140int ring_buffer_record_is_on(struct ring_buffer *buffer)
3141{
3142 return !atomic_read(&buffer->record_disabled);
3143}
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
3156{
3157 struct ring_buffer_per_cpu *cpu_buffer;
3158
3159 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3160 return;
3161
3162 cpu_buffer = buffer->buffers[cpu];
3163 atomic_inc(&cpu_buffer->record_disabled);
3164}
3165EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
3176{
3177 struct ring_buffer_per_cpu *cpu_buffer;
3178
3179 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3180 return;
3181
3182 cpu_buffer = buffer->buffers[cpu];
3183 atomic_dec(&cpu_buffer->record_disabled);
3184}
3185EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
3186
3187
3188
3189
3190
3191
3192
3193static inline unsigned long
3194rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3195{
3196 return local_read(&cpu_buffer->entries) -
3197 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3198}
3199
3200
3201
3202
3203
3204
3205u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
3206{
3207 unsigned long flags;
3208 struct ring_buffer_per_cpu *cpu_buffer;
3209 struct buffer_page *bpage;
3210 u64 ret = 0;
3211
3212 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3213 return 0;
3214
3215 cpu_buffer = buffer->buffers[cpu];
3216 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3217
3218
3219
3220
3221 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3222 bpage = cpu_buffer->reader_page;
3223 else
3224 bpage = rb_set_head_page(cpu_buffer);
3225 if (bpage)
3226 ret = bpage->page->time_stamp;
3227 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3228
3229 return ret;
3230}
3231EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3232
3233
3234
3235
3236
3237
3238unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
3239{
3240 struct ring_buffer_per_cpu *cpu_buffer;
3241 unsigned long ret;
3242
3243 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3244 return 0;
3245
3246 cpu_buffer = buffer->buffers[cpu];
3247 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3248
3249 return ret;
3250}
3251EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3252
3253
3254
3255
3256
3257
3258unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
3259{
3260 struct ring_buffer_per_cpu *cpu_buffer;
3261
3262 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3263 return 0;
3264
3265 cpu_buffer = buffer->buffers[cpu];
3266
3267 return rb_num_of_entries(cpu_buffer);
3268}
3269EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3270
3271
3272
3273
3274
3275
3276
3277unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3278{
3279 struct ring_buffer_per_cpu *cpu_buffer;
3280 unsigned long ret;
3281
3282 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3283 return 0;
3284
3285 cpu_buffer = buffer->buffers[cpu];
3286 ret = local_read(&cpu_buffer->overrun);
3287
3288 return ret;
3289}
3290EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3291
3292
3293
3294
3295
3296
3297
3298
3299unsigned long
3300ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3301{
3302 struct ring_buffer_per_cpu *cpu_buffer;
3303 unsigned long ret;
3304
3305 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3306 return 0;
3307
3308 cpu_buffer = buffer->buffers[cpu];
3309 ret = local_read(&cpu_buffer->commit_overrun);
3310
3311 return ret;
3312}
3313EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3314
3315
3316
3317
3318
3319
3320
3321unsigned long
3322ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3323{
3324 struct ring_buffer_per_cpu *cpu_buffer;
3325 unsigned long ret;
3326
3327 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3328 return 0;
3329
3330 cpu_buffer = buffer->buffers[cpu];
3331 ret = local_read(&cpu_buffer->dropped_events);
3332
3333 return ret;
3334}
3335EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3336
3337
3338
3339
3340
3341
3342unsigned long
3343ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3344{
3345 struct ring_buffer_per_cpu *cpu_buffer;
3346
3347 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3348 return 0;
3349
3350 cpu_buffer = buffer->buffers[cpu];
3351 return cpu_buffer->read;
3352}
3353EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3354
3355
3356
3357
3358
3359
3360
3361
3362unsigned long ring_buffer_entries(struct ring_buffer *buffer)
3363{
3364 struct ring_buffer_per_cpu *cpu_buffer;
3365 unsigned long entries = 0;
3366 int cpu;
3367
3368
3369 for_each_buffer_cpu(buffer, cpu) {
3370 cpu_buffer = buffer->buffers[cpu];
3371 entries += rb_num_of_entries(cpu_buffer);
3372 }
3373
3374 return entries;
3375}
3376EXPORT_SYMBOL_GPL(ring_buffer_entries);
3377
3378
3379
3380
3381
3382
3383
3384
3385unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
3386{
3387 struct ring_buffer_per_cpu *cpu_buffer;
3388 unsigned long overruns = 0;
3389 int cpu;
3390
3391
3392 for_each_buffer_cpu(buffer, cpu) {
3393 cpu_buffer = buffer->buffers[cpu];
3394 overruns += local_read(&cpu_buffer->overrun);
3395 }
3396
3397 return overruns;
3398}
3399EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3400
3401static void rb_iter_reset(struct ring_buffer_iter *iter)
3402{
3403 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3404
3405
3406 iter->head_page = cpu_buffer->reader_page;
3407 iter->head = cpu_buffer->reader_page->read;
3408
3409 iter->cache_reader_page = iter->head_page;
3410 iter->cache_read = cpu_buffer->read;
3411
3412 if (iter->head)
3413 iter->read_stamp = cpu_buffer->read_stamp;
3414 else
3415 iter->read_stamp = iter->head_page->page->time_stamp;
3416}
3417
3418
3419
3420
3421
3422
3423
3424
3425void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3426{
3427 struct ring_buffer_per_cpu *cpu_buffer;
3428 unsigned long flags;
3429
3430 if (!iter)
3431 return;
3432
3433 cpu_buffer = iter->cpu_buffer;
3434
3435 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3436 rb_iter_reset(iter);
3437 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3438}
3439EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3440
3441
3442
3443
3444
3445int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3446{
3447 struct ring_buffer_per_cpu *cpu_buffer;
3448
3449 cpu_buffer = iter->cpu_buffer;
3450
3451 return iter->head_page == cpu_buffer->commit_page &&
3452 iter->head == rb_commit_index(cpu_buffer);
3453}
3454EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3455
3456static void
3457rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3458 struct ring_buffer_event *event)
3459{
3460 u64 delta;
3461
3462 switch (event->type_len) {
3463 case RINGBUF_TYPE_PADDING:
3464 return;
3465
3466 case RINGBUF_TYPE_TIME_EXTEND:
3467 delta = event->array[0];
3468 delta <<= TS_SHIFT;
3469 delta += event->time_delta;
3470 cpu_buffer->read_stamp += delta;
3471 return;
3472
3473 case RINGBUF_TYPE_TIME_STAMP:
3474
3475 return;
3476
3477 case RINGBUF_TYPE_DATA:
3478 cpu_buffer->read_stamp += event->time_delta;
3479 return;
3480
3481 default:
3482 BUG();
3483 }
3484 return;
3485}
3486
3487static void
3488rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3489 struct ring_buffer_event *event)
3490{
3491 u64 delta;
3492
3493 switch (event->type_len) {
3494 case RINGBUF_TYPE_PADDING:
3495 return;
3496
3497 case RINGBUF_TYPE_TIME_EXTEND:
3498 delta = event->array[0];
3499 delta <<= TS_SHIFT;
3500 delta += event->time_delta;
3501 iter->read_stamp += delta;
3502 return;
3503
3504 case RINGBUF_TYPE_TIME_STAMP:
3505
3506 return;
3507
3508 case RINGBUF_TYPE_DATA:
3509 iter->read_stamp += event->time_delta;
3510 return;
3511
3512 default:
3513 BUG();
3514 }
3515 return;
3516}
3517
3518static struct buffer_page *
3519rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3520{
3521 struct buffer_page *reader = NULL;
3522 unsigned long overwrite;
3523 unsigned long flags;
3524 int nr_loops = 0;
3525 int ret;
3526
3527 local_irq_save(flags);
3528 arch_spin_lock(&cpu_buffer->lock);
3529
3530 again:
3531
3532
3533
3534
3535
3536
3537 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3538 reader = NULL;
3539 goto out;
3540 }
3541
3542 reader = cpu_buffer->reader_page;
3543
3544
3545 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3546 goto out;
3547
3548
3549 if (RB_WARN_ON(cpu_buffer,
3550 cpu_buffer->reader_page->read > rb_page_size(reader)))
3551 goto out;
3552
3553
3554 reader = NULL;
3555 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3556 goto out;
3557
3558
3559 if (rb_num_of_entries(cpu_buffer) == 0)
3560 goto out;
3561
3562
3563
3564
3565 local_set(&cpu_buffer->reader_page->write, 0);
3566 local_set(&cpu_buffer->reader_page->entries, 0);
3567 local_set(&cpu_buffer->reader_page->page->commit, 0);
3568 cpu_buffer->reader_page->real_end = 0;
3569
3570 spin:
3571
3572
3573
3574 reader = rb_set_head_page(cpu_buffer);
3575 if (!reader)
3576 goto out;
3577 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3578 cpu_buffer->reader_page->list.prev = reader->list.prev;
3579
3580
3581
3582
3583
3584
3585 cpu_buffer->pages = reader->list.prev;
3586
3587
3588 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599 smp_mb();
3600 overwrite = local_read(&(cpu_buffer->overrun));
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3614
3615
3616
3617
3618 if (!ret)
3619 goto spin;
3620
3621
3622
3623
3624
3625
3626 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3627 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3628
3629
3630 cpu_buffer->reader_page = reader;
3631 rb_reset_reader_page(cpu_buffer);
3632
3633 if (overwrite != cpu_buffer->last_overrun) {
3634 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3635 cpu_buffer->last_overrun = overwrite;
3636 }
3637
3638 goto again;
3639
3640 out:
3641 arch_spin_unlock(&cpu_buffer->lock);
3642 local_irq_restore(flags);
3643
3644 return reader;
3645}
3646
3647static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3648{
3649 struct ring_buffer_event *event;
3650 struct buffer_page *reader;
3651 unsigned length;
3652
3653 reader = rb_get_reader_page(cpu_buffer);
3654
3655
3656 if (RB_WARN_ON(cpu_buffer, !reader))
3657 return;
3658
3659 event = rb_reader_event(cpu_buffer);
3660
3661 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3662 cpu_buffer->read++;
3663
3664 rb_update_read_stamp(cpu_buffer, event);
3665
3666 length = rb_event_length(event);
3667 cpu_buffer->reader_page->read += length;
3668}
3669
3670static void rb_advance_iter(struct ring_buffer_iter *iter)
3671{
3672 struct ring_buffer_per_cpu *cpu_buffer;
3673 struct ring_buffer_event *event;
3674 unsigned length;
3675
3676 cpu_buffer = iter->cpu_buffer;
3677
3678
3679
3680
3681 if (iter->head >= rb_page_size(iter->head_page)) {
3682
3683 if (iter->head_page == cpu_buffer->commit_page)
3684 return;
3685 rb_inc_iter(iter);
3686 return;
3687 }
3688
3689 event = rb_iter_head_event(iter);
3690
3691 length = rb_event_length(event);
3692
3693
3694
3695
3696
3697 if (RB_WARN_ON(cpu_buffer,
3698 (iter->head_page == cpu_buffer->commit_page) &&
3699 (iter->head + length > rb_commit_index(cpu_buffer))))
3700 return;
3701
3702 rb_update_iter_read_stamp(iter, event);
3703
3704 iter->head += length;
3705
3706
3707 if ((iter->head >= rb_page_size(iter->head_page)) &&
3708 (iter->head_page != cpu_buffer->commit_page))
3709 rb_inc_iter(iter);
3710}
3711
3712static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3713{
3714 return cpu_buffer->lost_events;
3715}
3716
3717static struct ring_buffer_event *
3718rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3719 unsigned long *lost_events)
3720{
3721 struct ring_buffer_event *event;
3722 struct buffer_page *reader;
3723 int nr_loops = 0;
3724
3725 again:
3726
3727
3728
3729
3730
3731
3732 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3733 return NULL;
3734
3735 reader = rb_get_reader_page(cpu_buffer);
3736 if (!reader)
3737 return NULL;
3738
3739 event = rb_reader_event(cpu_buffer);
3740
3741 switch (event->type_len) {
3742 case RINGBUF_TYPE_PADDING:
3743 if (rb_null_event(event))
3744 RB_WARN_ON(cpu_buffer, 1);
3745
3746
3747
3748
3749
3750
3751
3752
3753 return event;
3754
3755 case RINGBUF_TYPE_TIME_EXTEND:
3756
3757 rb_advance_reader(cpu_buffer);
3758 goto again;
3759
3760 case RINGBUF_TYPE_TIME_STAMP:
3761
3762 rb_advance_reader(cpu_buffer);
3763 goto again;
3764
3765 case RINGBUF_TYPE_DATA:
3766 if (ts) {
3767 *ts = cpu_buffer->read_stamp + event->time_delta;
3768 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3769 cpu_buffer->cpu, ts);
3770 }
3771 if (lost_events)
3772 *lost_events = rb_lost_events(cpu_buffer);
3773 return event;
3774
3775 default:
3776 BUG();
3777 }
3778
3779 return NULL;
3780}
3781EXPORT_SYMBOL_GPL(ring_buffer_peek);
3782
3783static struct ring_buffer_event *
3784rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3785{
3786 struct ring_buffer *buffer;
3787 struct ring_buffer_per_cpu *cpu_buffer;
3788 struct ring_buffer_event *event;
3789 int nr_loops = 0;
3790
3791 cpu_buffer = iter->cpu_buffer;
3792 buffer = cpu_buffer->buffer;
3793
3794
3795
3796
3797
3798
3799 if (unlikely(iter->cache_read != cpu_buffer->read ||
3800 iter->cache_reader_page != cpu_buffer->reader_page))
3801 rb_iter_reset(iter);
3802
3803 again:
3804 if (ring_buffer_iter_empty(iter))
3805 return NULL;
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
3816 return NULL;
3817
3818 if (rb_per_cpu_empty(cpu_buffer))
3819 return NULL;
3820
3821 if (iter->head >= rb_page_size(iter->head_page)) {
3822 rb_inc_iter(iter);
3823 goto again;
3824 }
3825
3826 event = rb_iter_head_event(iter);
3827
3828 switch (event->type_len) {
3829 case RINGBUF_TYPE_PADDING:
3830 if (rb_null_event(event)) {
3831 rb_inc_iter(iter);
3832 goto again;
3833 }
3834 rb_advance_iter(iter);
3835 return event;
3836
3837 case RINGBUF_TYPE_TIME_EXTEND:
3838
3839 rb_advance_iter(iter);
3840 goto again;
3841
3842 case RINGBUF_TYPE_TIME_STAMP:
3843
3844 rb_advance_iter(iter);
3845 goto again;
3846
3847 case RINGBUF_TYPE_DATA:
3848 if (ts) {
3849 *ts = iter->read_stamp + event->time_delta;
3850 ring_buffer_normalize_time_stamp(buffer,
3851 cpu_buffer->cpu, ts);
3852 }
3853 return event;
3854
3855 default:
3856 BUG();
3857 }
3858
3859 return NULL;
3860}
3861EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
3862
3863static inline int rb_ok_to_lock(void)
3864{
3865
3866
3867
3868
3869
3870
3871 if (likely(!in_nmi()))
3872 return 1;
3873
3874 tracing_off_permanent();
3875 return 0;
3876}
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888struct ring_buffer_event *
3889ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3890 unsigned long *lost_events)
3891{
3892 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3893 struct ring_buffer_event *event;
3894 unsigned long flags;
3895 int dolock;
3896
3897 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3898 return NULL;
3899
3900 dolock = rb_ok_to_lock();
3901 again:
3902 local_irq_save(flags);
3903 if (dolock)
3904 raw_spin_lock(&cpu_buffer->reader_lock);
3905 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3906 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3907 rb_advance_reader(cpu_buffer);
3908 if (dolock)
3909 raw_spin_unlock(&cpu_buffer->reader_lock);
3910 local_irq_restore(flags);
3911
3912 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3913 goto again;
3914
3915 return event;
3916}
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926struct ring_buffer_event *
3927ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3928{
3929 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3930 struct ring_buffer_event *event;
3931 unsigned long flags;
3932
3933 again:
3934 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3935 event = rb_iter_peek(iter, ts);
3936 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3937
3938 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3939 goto again;
3940
3941 return event;
3942}
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955struct ring_buffer_event *
3956ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3957 unsigned long *lost_events)
3958{
3959 struct ring_buffer_per_cpu *cpu_buffer;
3960 struct ring_buffer_event *event = NULL;
3961 unsigned long flags;
3962 int dolock;
3963
3964 dolock = rb_ok_to_lock();
3965
3966 again:
3967
3968 preempt_disable();
3969
3970 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3971 goto out;
3972
3973 cpu_buffer = buffer->buffers[cpu];
3974 local_irq_save(flags);
3975 if (dolock)
3976 raw_spin_lock(&cpu_buffer->reader_lock);
3977
3978 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3979 if (event) {
3980 cpu_buffer->lost_events = 0;
3981 rb_advance_reader(cpu_buffer);
3982 }
3983
3984 if (dolock)
3985 raw_spin_unlock(&cpu_buffer->reader_lock);
3986 local_irq_restore(flags);
3987
3988 out:
3989 preempt_enable();
3990
3991 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3992 goto again;
3993
3994 return event;
3995}
3996EXPORT_SYMBOL_GPL(ring_buffer_consume);
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018struct ring_buffer_iter *
4019ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
4020{
4021 struct ring_buffer_per_cpu *cpu_buffer;
4022 struct ring_buffer_iter *iter;
4023
4024 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4025 return NULL;
4026
4027 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
4028 if (!iter)
4029 return NULL;
4030
4031 cpu_buffer = buffer->buffers[cpu];
4032
4033 iter->cpu_buffer = cpu_buffer;
4034
4035 atomic_inc(&buffer->resize_disabled);
4036 atomic_inc(&cpu_buffer->record_disabled);
4037
4038 return iter;
4039}
4040EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
4041
4042
4043
4044
4045
4046
4047
4048
4049void
4050ring_buffer_read_prepare_sync(void)
4051{
4052 synchronize_sched();
4053}
4054EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067void
4068ring_buffer_read_start(struct ring_buffer_iter *iter)
4069{
4070 struct ring_buffer_per_cpu *cpu_buffer;
4071 unsigned long flags;
4072
4073 if (!iter)
4074 return;
4075
4076 cpu_buffer = iter->cpu_buffer;
4077
4078 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4079 arch_spin_lock(&cpu_buffer->lock);
4080 rb_iter_reset(iter);
4081 arch_spin_unlock(&cpu_buffer->lock);
4082 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4083}
4084EXPORT_SYMBOL_GPL(ring_buffer_read_start);
4085
4086
4087
4088
4089
4090
4091
4092
4093void
4094ring_buffer_read_finish(struct ring_buffer_iter *iter)
4095{
4096 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4097 unsigned long flags;
4098
4099
4100
4101
4102
4103
4104
4105 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4106 rb_check_pages(cpu_buffer);
4107 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4108
4109 atomic_dec(&cpu_buffer->record_disabled);
4110 atomic_dec(&cpu_buffer->buffer->resize_disabled);
4111 kfree(iter);
4112}
4113EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
4114
4115
4116
4117
4118
4119
4120
4121
4122struct ring_buffer_event *
4123ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
4124{
4125 struct ring_buffer_event *event;
4126 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4127 unsigned long flags;
4128
4129 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4130 again:
4131 event = rb_iter_peek(iter, ts);
4132 if (!event)
4133 goto out;
4134
4135 if (event->type_len == RINGBUF_TYPE_PADDING)
4136 goto again;
4137
4138 rb_advance_iter(iter);
4139 out:
4140 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4141
4142 return event;
4143}
4144EXPORT_SYMBOL_GPL(ring_buffer_read);
4145
4146
4147
4148
4149
4150unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
4151{
4152
4153
4154
4155
4156
4157
4158 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4159 return 0;
4160
4161 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
4162}
4163EXPORT_SYMBOL_GPL(ring_buffer_size);
4164
4165static void
4166rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
4167{
4168 rb_head_page_deactivate(cpu_buffer);
4169
4170 cpu_buffer->head_page
4171 = list_entry(cpu_buffer->pages, struct buffer_page, list);
4172 local_set(&cpu_buffer->head_page->write, 0);
4173 local_set(&cpu_buffer->head_page->entries, 0);
4174 local_set(&cpu_buffer->head_page->page->commit, 0);
4175
4176 cpu_buffer->head_page->read = 0;
4177
4178 cpu_buffer->tail_page = cpu_buffer->head_page;
4179 cpu_buffer->commit_page = cpu_buffer->head_page;
4180
4181 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
4182 INIT_LIST_HEAD(&cpu_buffer->new_pages);
4183 local_set(&cpu_buffer->reader_page->write, 0);
4184 local_set(&cpu_buffer->reader_page->entries, 0);
4185 local_set(&cpu_buffer->reader_page->page->commit, 0);
4186 cpu_buffer->reader_page->read = 0;
4187
4188 local_set(&cpu_buffer->entries_bytes, 0);
4189 local_set(&cpu_buffer->overrun, 0);
4190 local_set(&cpu_buffer->commit_overrun, 0);
4191 local_set(&cpu_buffer->dropped_events, 0);
4192 local_set(&cpu_buffer->entries, 0);
4193 local_set(&cpu_buffer->committing, 0);
4194 local_set(&cpu_buffer->commits, 0);
4195 cpu_buffer->read = 0;
4196 cpu_buffer->read_bytes = 0;
4197
4198 cpu_buffer->write_stamp = 0;
4199 cpu_buffer->read_stamp = 0;
4200
4201 cpu_buffer->lost_events = 0;
4202 cpu_buffer->last_overrun = 0;
4203
4204 rb_head_page_activate(cpu_buffer);
4205}
4206
4207
4208
4209
4210
4211
4212void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
4213{
4214 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4215 unsigned long flags;
4216
4217 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4218 return;
4219
4220 atomic_inc(&buffer->resize_disabled);
4221 atomic_inc(&cpu_buffer->record_disabled);
4222
4223
4224 synchronize_sched();
4225
4226 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4227
4228 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
4229 goto out;
4230
4231 arch_spin_lock(&cpu_buffer->lock);
4232
4233 rb_reset_cpu(cpu_buffer);
4234
4235 arch_spin_unlock(&cpu_buffer->lock);
4236
4237 out:
4238 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4239
4240 atomic_dec(&cpu_buffer->record_disabled);
4241 atomic_dec(&buffer->resize_disabled);
4242}
4243EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
4244
4245
4246
4247
4248
4249void ring_buffer_reset(struct ring_buffer *buffer)
4250{
4251 int cpu;
4252
4253 for_each_buffer_cpu(buffer, cpu)
4254 ring_buffer_reset_cpu(buffer, cpu);
4255}
4256EXPORT_SYMBOL_GPL(ring_buffer_reset);
4257
4258
4259
4260
4261
4262int ring_buffer_empty(struct ring_buffer *buffer)
4263{
4264 struct ring_buffer_per_cpu *cpu_buffer;
4265 unsigned long flags;
4266 int dolock;
4267 int cpu;
4268 int ret;
4269
4270 dolock = rb_ok_to_lock();
4271
4272
4273 for_each_buffer_cpu(buffer, cpu) {
4274 cpu_buffer = buffer->buffers[cpu];
4275 local_irq_save(flags);
4276 if (dolock)
4277 raw_spin_lock(&cpu_buffer->reader_lock);
4278 ret = rb_per_cpu_empty(cpu_buffer);
4279 if (dolock)
4280 raw_spin_unlock(&cpu_buffer->reader_lock);
4281 local_irq_restore(flags);
4282
4283 if (!ret)
4284 return 0;
4285 }
4286
4287 return 1;
4288}
4289EXPORT_SYMBOL_GPL(ring_buffer_empty);
4290
4291
4292
4293
4294
4295
4296int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
4297{
4298 struct ring_buffer_per_cpu *cpu_buffer;
4299 unsigned long flags;
4300 int dolock;
4301 int ret;
4302
4303 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4304 return 1;
4305
4306 dolock = rb_ok_to_lock();
4307
4308 cpu_buffer = buffer->buffers[cpu];
4309 local_irq_save(flags);
4310 if (dolock)
4311 raw_spin_lock(&cpu_buffer->reader_lock);
4312 ret = rb_per_cpu_empty(cpu_buffer);
4313 if (dolock)
4314 raw_spin_unlock(&cpu_buffer->reader_lock);
4315 local_irq_restore(flags);
4316
4317 return ret;
4318}
4319EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
4320
4321#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4333 struct ring_buffer *buffer_b, int cpu)
4334{
4335 struct ring_buffer_per_cpu *cpu_buffer_a;
4336 struct ring_buffer_per_cpu *cpu_buffer_b;
4337 int ret = -EINVAL;
4338
4339 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4340 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4341 goto out;
4342
4343 cpu_buffer_a = buffer_a->buffers[cpu];
4344 cpu_buffer_b = buffer_b->buffers[cpu];
4345
4346
4347 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4348 goto out;
4349
4350 ret = -EAGAIN;
4351
4352 if (ring_buffer_flags != RB_BUFFERS_ON)
4353 goto out;
4354
4355 if (atomic_read(&buffer_a->record_disabled))
4356 goto out;
4357
4358 if (atomic_read(&buffer_b->record_disabled))
4359 goto out;
4360
4361 if (atomic_read(&cpu_buffer_a->record_disabled))
4362 goto out;
4363
4364 if (atomic_read(&cpu_buffer_b->record_disabled))
4365 goto out;
4366
4367
4368
4369
4370
4371
4372
4373 atomic_inc(&cpu_buffer_a->record_disabled);
4374 atomic_inc(&cpu_buffer_b->record_disabled);
4375
4376 ret = -EBUSY;
4377 if (local_read(&cpu_buffer_a->committing))
4378 goto out_dec;
4379 if (local_read(&cpu_buffer_b->committing))
4380 goto out_dec;
4381
4382 buffer_a->buffers[cpu] = cpu_buffer_b;
4383 buffer_b->buffers[cpu] = cpu_buffer_a;
4384
4385 cpu_buffer_b->buffer = buffer_a;
4386 cpu_buffer_a->buffer = buffer_b;
4387
4388 ret = 0;
4389
4390out_dec:
4391 atomic_dec(&cpu_buffer_a->record_disabled);
4392 atomic_dec(&cpu_buffer_b->record_disabled);
4393out:
4394 return ret;
4395}
4396EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4397#endif
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4416{
4417 struct buffer_data_page *bpage;
4418 struct page *page;
4419
4420 page = alloc_pages_node(cpu_to_node(cpu),
4421 GFP_KERNEL | __GFP_NORETRY, 0);
4422 if (!page)
4423 return NULL;
4424
4425 bpage = page_address(page);
4426
4427 rb_init_page(bpage);
4428
4429 return bpage;
4430}
4431EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4432
4433
4434
4435
4436
4437
4438
4439
4440void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
4441{
4442 free_page((unsigned long)data);
4443}
4444EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479int ring_buffer_read_page(struct ring_buffer *buffer,
4480 void **data_page, size_t len, int cpu, int full)
4481{
4482 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4483 struct ring_buffer_event *event;
4484 struct buffer_data_page *bpage;
4485 struct buffer_page *reader;
4486 unsigned long missed_events;
4487 unsigned long flags;
4488 unsigned int commit;
4489 unsigned int read;
4490 u64 save_timestamp;
4491 int ret = -1;
4492
4493 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4494 goto out;
4495
4496
4497
4498
4499
4500 if (len <= BUF_PAGE_HDR_SIZE)
4501 goto out;
4502
4503 len -= BUF_PAGE_HDR_SIZE;
4504
4505 if (!data_page)
4506 goto out;
4507
4508 bpage = *data_page;
4509 if (!bpage)
4510 goto out;
4511
4512 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4513
4514 reader = rb_get_reader_page(cpu_buffer);
4515 if (!reader)
4516 goto out_unlock;
4517
4518 event = rb_reader_event(cpu_buffer);
4519
4520 read = reader->read;
4521 commit = rb_page_commit(reader);
4522
4523
4524 missed_events = cpu_buffer->lost_events;
4525
4526
4527
4528
4529
4530
4531
4532
4533 if (read || (len < (commit - read)) ||
4534 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4535 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4536 unsigned int rpos = read;
4537 unsigned int pos = 0;
4538 unsigned int size;
4539
4540 if (full)
4541 goto out_unlock;
4542
4543 if (len > (commit - read))
4544 len = (commit - read);
4545
4546
4547 size = rb_event_ts_length(event);
4548
4549 if (len < size)
4550 goto out_unlock;
4551
4552
4553 save_timestamp = cpu_buffer->read_stamp;
4554
4555
4556 do {
4557
4558
4559
4560
4561
4562
4563 size = rb_event_length(event);
4564 memcpy(bpage->data + pos, rpage->data + rpos, size);
4565
4566 len -= size;
4567
4568 rb_advance_reader(cpu_buffer);
4569 rpos = reader->read;
4570 pos += size;
4571
4572 if (rpos >= commit)
4573 break;
4574
4575 event = rb_reader_event(cpu_buffer);
4576
4577 size = rb_event_ts_length(event);
4578 } while (len >= size);
4579
4580
4581 local_set(&bpage->commit, pos);
4582 bpage->time_stamp = save_timestamp;
4583
4584
4585 read = 0;
4586 } else {
4587
4588 cpu_buffer->read += rb_page_entries(reader);
4589 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4590
4591
4592 rb_init_page(bpage);
4593 bpage = reader->page;
4594 reader->page = *data_page;
4595 local_set(&reader->write, 0);
4596 local_set(&reader->entries, 0);
4597 reader->read = 0;
4598 *data_page = bpage;
4599
4600
4601
4602
4603
4604
4605 if (reader->real_end)
4606 local_set(&bpage->commit, reader->real_end);
4607 }
4608 ret = read;
4609
4610 cpu_buffer->lost_events = 0;
4611
4612 commit = local_read(&bpage->commit);
4613
4614
4615
4616 if (missed_events) {
4617
4618
4619
4620 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4621 memcpy(&bpage->data[commit], &missed_events,
4622 sizeof(missed_events));
4623 local_add(RB_MISSED_STORED, &bpage->commit);
4624 commit += sizeof(missed_events);
4625 }
4626 local_add(RB_MISSED_EVENTS, &bpage->commit);
4627 }
4628
4629
4630
4631
4632 if (commit < BUF_PAGE_SIZE)
4633 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4634
4635 out_unlock:
4636 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4637
4638 out:
4639 return ret;
4640}
4641EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4642
4643#ifdef CONFIG_HOTPLUG_CPU
4644static int rb_cpu_notify(struct notifier_block *self,
4645 unsigned long action, void *hcpu)
4646{
4647 struct ring_buffer *buffer =
4648 container_of(self, struct ring_buffer, cpu_notify);
4649 long cpu = (long)hcpu;
4650 int cpu_i, nr_pages_same;
4651 unsigned int nr_pages;
4652
4653 switch (action) {
4654 case CPU_UP_PREPARE:
4655 case CPU_UP_PREPARE_FROZEN:
4656 if (cpumask_test_cpu(cpu, buffer->cpumask))
4657 return NOTIFY_OK;
4658
4659 nr_pages = 0;
4660 nr_pages_same = 1;
4661
4662 for_each_buffer_cpu(buffer, cpu_i) {
4663
4664 if (nr_pages == 0)
4665 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4666 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4667 nr_pages_same = 0;
4668 break;
4669 }
4670 }
4671
4672 if (!nr_pages_same)
4673 nr_pages = 2;
4674 buffer->buffers[cpu] =
4675 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4676 if (!buffer->buffers[cpu]) {
4677 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4678 cpu);
4679 return NOTIFY_OK;
4680 }
4681 smp_wmb();
4682 cpumask_set_cpu(cpu, buffer->cpumask);
4683 break;
4684 case CPU_DOWN_PREPARE:
4685 case CPU_DOWN_PREPARE_FROZEN:
4686
4687
4688
4689
4690
4691 break;
4692 default:
4693 break;
4694 }
4695 return NOTIFY_OK;
4696}
4697#endif
4698
4699#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715static struct task_struct *rb_threads[NR_CPUS] __initdata;
4716
4717struct rb_test_data {
4718 struct ring_buffer *buffer;
4719 unsigned long events;
4720 unsigned long bytes_written;
4721 unsigned long bytes_alloc;
4722 unsigned long bytes_dropped;
4723 unsigned long events_nested;
4724 unsigned long bytes_written_nested;
4725 unsigned long bytes_alloc_nested;
4726 unsigned long bytes_dropped_nested;
4727 int min_size_nested;
4728 int max_size_nested;
4729 int max_size;
4730 int min_size;
4731 int cpu;
4732 int cnt;
4733};
4734
4735static struct rb_test_data rb_data[NR_CPUS] __initdata;
4736
4737
4738#define RB_TEST_BUFFER_SIZE 1048576
4739
4740static char rb_string[] __initdata =
4741 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
4742 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
4743 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
4744
4745static bool rb_test_started __initdata;
4746
4747struct rb_item {
4748 int size;
4749 char str[];
4750};
4751
4752static __init int rb_write_something(struct rb_test_data *data, bool nested)
4753{
4754 struct ring_buffer_event *event;
4755 struct rb_item *item;
4756 bool started;
4757 int event_len;
4758 int size;
4759 int len;
4760 int cnt;
4761
4762
4763 cnt = data->cnt + (nested ? 27 : 0);
4764
4765
4766 size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
4767
4768 len = size + sizeof(struct rb_item);
4769
4770 started = rb_test_started;
4771
4772 smp_rmb();
4773
4774 event = ring_buffer_lock_reserve(data->buffer, len);
4775 if (!event) {
4776
4777 if (started) {
4778 if (nested)
4779 data->bytes_dropped += len;
4780 else
4781 data->bytes_dropped_nested += len;
4782 }
4783 return len;
4784 }
4785
4786 event_len = ring_buffer_event_length(event);
4787
4788 if (RB_WARN_ON(data->buffer, event_len < len))
4789 goto out;
4790
4791 item = ring_buffer_event_data(event);
4792 item->size = size;
4793 memcpy(item->str, rb_string, size);
4794
4795 if (nested) {
4796 data->bytes_alloc_nested += event_len;
4797 data->bytes_written_nested += len;
4798 data->events_nested++;
4799 if (!data->min_size_nested || len < data->min_size_nested)
4800 data->min_size_nested = len;
4801 if (len > data->max_size_nested)
4802 data->max_size_nested = len;
4803 } else {
4804 data->bytes_alloc += event_len;
4805 data->bytes_written += len;
4806 data->events++;
4807 if (!data->min_size || len < data->min_size)
4808 data->max_size = len;
4809 if (len > data->max_size)
4810 data->max_size = len;
4811 }
4812
4813 out:
4814 ring_buffer_unlock_commit(data->buffer, event);
4815
4816 return 0;
4817}
4818
4819static __init int rb_test(void *arg)
4820{
4821 struct rb_test_data *data = arg;
4822
4823 while (!kthread_should_stop()) {
4824 rb_write_something(data, false);
4825 data->cnt++;
4826
4827 set_current_state(TASK_INTERRUPTIBLE);
4828
4829 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
4830 }
4831
4832 return 0;
4833}
4834
4835static __init void rb_ipi(void *ignore)
4836{
4837 struct rb_test_data *data;
4838 int cpu = smp_processor_id();
4839
4840 data = &rb_data[cpu];
4841 rb_write_something(data, true);
4842}
4843
4844static __init int rb_hammer_test(void *arg)
4845{
4846 while (!kthread_should_stop()) {
4847
4848
4849 smp_call_function(rb_ipi, NULL, 1);
4850
4851 schedule();
4852 }
4853
4854 return 0;
4855}
4856
4857static __init int test_ringbuffer(void)
4858{
4859 struct task_struct *rb_hammer;
4860 struct ring_buffer *buffer;
4861 int cpu;
4862 int ret = 0;
4863
4864 pr_info("Running ring buffer tests...\n");
4865
4866 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
4867 if (WARN_ON(!buffer))
4868 return 0;
4869
4870
4871 ring_buffer_record_off(buffer);
4872
4873 for_each_online_cpu(cpu) {
4874 rb_data[cpu].buffer = buffer;
4875 rb_data[cpu].cpu = cpu;
4876 rb_data[cpu].cnt = cpu;
4877 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
4878 "rbtester/%d", cpu);
4879 if (WARN_ON(!rb_threads[cpu])) {
4880 pr_cont("FAILED\n");
4881 ret = -1;
4882 goto out_free;
4883 }
4884
4885 kthread_bind(rb_threads[cpu], cpu);
4886 wake_up_process(rb_threads[cpu]);
4887 }
4888
4889
4890 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
4891 if (WARN_ON(!rb_hammer)) {
4892 pr_cont("FAILED\n");
4893 ret = -1;
4894 goto out_free;
4895 }
4896
4897 ring_buffer_record_on(buffer);
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907 smp_wmb();
4908 rb_test_started = true;
4909
4910 set_current_state(TASK_INTERRUPTIBLE);
4911 ;
4912 schedule_timeout(10 * HZ);
4913
4914 kthread_stop(rb_hammer);
4915
4916 out_free:
4917 for_each_online_cpu(cpu) {
4918 if (!rb_threads[cpu])
4919 break;
4920 kthread_stop(rb_threads[cpu]);
4921 }
4922 if (ret) {
4923 ring_buffer_free(buffer);
4924 return ret;
4925 }
4926
4927
4928 pr_info("finished\n");
4929 for_each_online_cpu(cpu) {
4930 struct ring_buffer_event *event;
4931 struct rb_test_data *data = &rb_data[cpu];
4932 struct rb_item *item;
4933 unsigned long total_events;
4934 unsigned long total_dropped;
4935 unsigned long total_written;
4936 unsigned long total_alloc;
4937 unsigned long total_read = 0;
4938 unsigned long total_size = 0;
4939 unsigned long total_len = 0;
4940 unsigned long total_lost = 0;
4941 unsigned long lost;
4942 int big_event_size;
4943 int small_event_size;
4944
4945 ret = -1;
4946
4947 total_events = data->events + data->events_nested;
4948 total_written = data->bytes_written + data->bytes_written_nested;
4949 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
4950 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
4951
4952 big_event_size = data->max_size + data->max_size_nested;
4953 small_event_size = data->min_size + data->min_size_nested;
4954
4955 pr_info("CPU %d:\n", cpu);
4956 pr_info(" events: %ld\n", total_events);
4957 pr_info(" dropped bytes: %ld\n", total_dropped);
4958 pr_info(" alloced bytes: %ld\n", total_alloc);
4959 pr_info(" written bytes: %ld\n", total_written);
4960 pr_info(" biggest event: %d\n", big_event_size);
4961 pr_info(" smallest event: %d\n", small_event_size);
4962
4963 if (RB_WARN_ON(buffer, total_dropped))
4964 break;
4965
4966 ret = 0;
4967
4968 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
4969 total_lost += lost;
4970 item = ring_buffer_event_data(event);
4971 total_len += ring_buffer_event_length(event);
4972 total_size += item->size + sizeof(struct rb_item);
4973 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
4974 pr_info("FAILED!\n");
4975 pr_info("buffer had: %.*s\n", item->size, item->str);
4976 pr_info("expected: %.*s\n", item->size, rb_string);
4977 RB_WARN_ON(buffer, 1);
4978 ret = -1;
4979 break;
4980 }
4981 total_read++;
4982 }
4983 if (ret)
4984 break;
4985
4986 ret = -1;
4987
4988 pr_info(" read events: %ld\n", total_read);
4989 pr_info(" lost events: %ld\n", total_lost);
4990 pr_info(" total events: %ld\n", total_lost + total_read);
4991 pr_info(" recorded len bytes: %ld\n", total_len);
4992 pr_info(" recorded size bytes: %ld\n", total_size);
4993 if (total_lost)
4994 pr_info(" With dropped events, record len and size may not match\n"
4995 " alloced and written from above\n");
4996 if (!total_lost) {
4997 if (RB_WARN_ON(buffer, total_len != total_alloc ||
4998 total_size != total_written))
4999 break;
5000 }
5001 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
5002 break;
5003
5004 ret = 0;
5005 }
5006 if (!ret)
5007 pr_info("Ring buffer PASSED!\n");
5008
5009 ring_buffer_free(buffer);
5010 return 0;
5011}
5012
5013late_initcall(test_ringbuffer);
5014#endif
5015