1
2
3
4
5
6
7#include <linux/trace_events.h>
8#include <linux/ring_buffer.h>
9#include <linux/trace_clock.h>
10#include <linux/sched/clock.h>
11#include <linux/trace_seq.h>
12#include <linux/spinlock.h>
13#include <linux/irq_work.h>
14#include <linux/uaccess.h>
15#include <linux/hardirq.h>
16#include <linux/kthread.h>
17#include <linux/module.h>
18#include <linux/percpu.h>
19#include <linux/mutex.h>
20#include <linux/delay.h>
21#include <linux/slab.h>
22#include <linux/init.h>
23#include <linux/hash.h>
24#include <linux/list.h>
25#include <linux/cpu.h>
26#include <linux/oom.h>
27
28#include <asm/local.h>
29
30static void update_pages_handler(struct work_struct *work);
31
32
33
34
35int ring_buffer_print_entry_header(struct trace_seq *s)
36{
37 trace_seq_puts(s, "# compressed entry header\n");
38 trace_seq_puts(s, "\ttype_len : 5 bits\n");
39 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
40 trace_seq_puts(s, "\tarray : 32 bits\n");
41 trace_seq_putc(s, '\n');
42 trace_seq_printf(s, "\tpadding : type == %d\n",
43 RINGBUF_TYPE_PADDING);
44 trace_seq_printf(s, "\ttime_extend : type == %d\n",
45 RINGBUF_TYPE_TIME_EXTEND);
46 trace_seq_printf(s, "\ttime_stamp : type == %d\n",
47 RINGBUF_TYPE_TIME_STAMP);
48 trace_seq_printf(s, "\tdata max type_len == %d\n",
49 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
50
51 return !trace_seq_has_overflowed(s);
52}
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123#define RB_BUFFER_OFF (1 << 20)
124
125#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
126
127#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
128#define RB_ALIGNMENT 4U
129#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
130#define RB_EVNT_MIN_SIZE 8U
131
132#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
133# define RB_FORCE_8BYTE_ALIGNMENT 0
134# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
135#else
136# define RB_FORCE_8BYTE_ALIGNMENT 1
137# define RB_ARCH_ALIGNMENT 8U
138#endif
139
140#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
141
142
143#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
144
145enum {
146 RB_LEN_TIME_EXTEND = 8,
147 RB_LEN_TIME_STAMP = 8,
148};
149
150#define skip_time_extend(event) \
151 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
152
153#define extended_time(event) \
154 (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
155
156static inline int rb_null_event(struct ring_buffer_event *event)
157{
158 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
159}
160
161static void rb_event_set_padding(struct ring_buffer_event *event)
162{
163
164 event->type_len = RINGBUF_TYPE_PADDING;
165 event->time_delta = 0;
166}
167
168static unsigned
169rb_event_data_length(struct ring_buffer_event *event)
170{
171 unsigned length;
172
173 if (event->type_len)
174 length = event->type_len * RB_ALIGNMENT;
175 else
176 length = event->array[0];
177 return length + RB_EVNT_HDR_SIZE;
178}
179
180
181
182
183
184
185static inline unsigned
186rb_event_length(struct ring_buffer_event *event)
187{
188 switch (event->type_len) {
189 case RINGBUF_TYPE_PADDING:
190 if (rb_null_event(event))
191
192 return -1;
193 return event->array[0] + RB_EVNT_HDR_SIZE;
194
195 case RINGBUF_TYPE_TIME_EXTEND:
196 return RB_LEN_TIME_EXTEND;
197
198 case RINGBUF_TYPE_TIME_STAMP:
199 return RB_LEN_TIME_STAMP;
200
201 case RINGBUF_TYPE_DATA:
202 return rb_event_data_length(event);
203 default:
204 BUG();
205 }
206
207 return 0;
208}
209
210
211
212
213
214static inline unsigned
215rb_event_ts_length(struct ring_buffer_event *event)
216{
217 unsigned len = 0;
218
219 if (extended_time(event)) {
220
221 len = RB_LEN_TIME_EXTEND;
222 event = skip_time_extend(event);
223 }
224 return len + rb_event_length(event);
225}
226
227
228
229
230
231
232
233
234
235
236
237unsigned ring_buffer_event_length(struct ring_buffer_event *event)
238{
239 unsigned length;
240
241 if (extended_time(event))
242 event = skip_time_extend(event);
243
244 length = rb_event_length(event);
245 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
246 return length;
247 length -= RB_EVNT_HDR_SIZE;
248 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
249 length -= sizeof(event->array[0]);
250 return length;
251}
252EXPORT_SYMBOL_GPL(ring_buffer_event_length);
253
254
255static __always_inline void *
256rb_event_data(struct ring_buffer_event *event)
257{
258 if (extended_time(event))
259 event = skip_time_extend(event);
260 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
261
262 if (event->type_len)
263 return (void *)&event->array[0];
264
265 return (void *)&event->array[1];
266}
267
268
269
270
271
272void *ring_buffer_event_data(struct ring_buffer_event *event)
273{
274 return rb_event_data(event);
275}
276EXPORT_SYMBOL_GPL(ring_buffer_event_data);
277
278#define for_each_buffer_cpu(buffer, cpu) \
279 for_each_cpu(cpu, buffer->cpumask)
280
281#define TS_SHIFT 27
282#define TS_MASK ((1ULL << TS_SHIFT) - 1)
283#define TS_DELTA_TEST (~TS_MASK)
284
285
286
287
288
289
290
291
292
293
294
295u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
296{
297 u64 ts;
298
299 ts = event->array[0];
300 ts <<= TS_SHIFT;
301 ts += event->time_delta;
302
303 return ts;
304}
305
306
307#define RB_MISSED_EVENTS (1 << 31)
308
309#define RB_MISSED_STORED (1 << 30)
310
311#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
312
313struct buffer_data_page {
314 u64 time_stamp;
315 local_t commit;
316 unsigned char data[] RB_ALIGN_DATA;
317};
318
319
320
321
322
323
324
325
326
327struct buffer_page {
328 struct list_head list;
329 local_t write;
330 unsigned read;
331 local_t entries;
332 unsigned long real_end;
333 struct buffer_data_page *page;
334};
335
336
337
338
339
340
341
342
343
344
345
346
347
348#define RB_WRITE_MASK 0xfffff
349#define RB_WRITE_INTCNT (1 << 20)
350
351static void rb_init_page(struct buffer_data_page *bpage)
352{
353 local_set(&bpage->commit, 0);
354}
355
356
357
358
359
360
361
362size_t ring_buffer_page_len(void *page)
363{
364 struct buffer_data_page *bpage = page;
365
366 return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
367 + BUF_PAGE_HDR_SIZE;
368}
369
370
371
372
373
374static void free_buffer_page(struct buffer_page *bpage)
375{
376 free_page((unsigned long)bpage->page);
377 kfree(bpage);
378}
379
380
381
382
383static inline int test_time_stamp(u64 delta)
384{
385 if (delta & TS_DELTA_TEST)
386 return 1;
387 return 0;
388}
389
390#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
391
392
393#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
394
395int ring_buffer_print_page_header(struct trace_seq *s)
396{
397 struct buffer_data_page field;
398
399 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
403
404 trace_seq_printf(s, "\tfield: local_t commit;\t"
405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
406 (unsigned int)offsetof(typeof(field), commit),
407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
409
410 trace_seq_printf(s, "\tfield: int overwrite;\t"
411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
412 (unsigned int)offsetof(typeof(field), commit),
413 1,
414 (unsigned int)is_signed_type(long));
415
416 trace_seq_printf(s, "\tfield: char data;\t"
417 "offset:%u;\tsize:%u;\tsigned:%u;\n",
418 (unsigned int)offsetof(typeof(field), data),
419 (unsigned int)BUF_PAGE_SIZE,
420 (unsigned int)is_signed_type(char));
421
422 return !trace_seq_has_overflowed(s);
423}
424
425struct rb_irq_work {
426 struct irq_work work;
427 wait_queue_head_t waiters;
428 wait_queue_head_t full_waiters;
429 bool waiters_pending;
430 bool full_waiters_pending;
431 bool wakeup_full;
432};
433
434
435
436
437struct rb_event_info {
438 u64 ts;
439 u64 delta;
440 unsigned long length;
441 struct buffer_page *tail_page;
442 int add_timestamp;
443};
444
445
446
447
448
449
450
451
452
453
454enum {
455 RB_CTX_NMI,
456 RB_CTX_IRQ,
457 RB_CTX_SOFTIRQ,
458 RB_CTX_NORMAL,
459 RB_CTX_MAX
460};
461
462
463
464
465struct ring_buffer_per_cpu {
466 int cpu;
467 atomic_t record_disabled;
468 struct ring_buffer *buffer;
469 raw_spinlock_t reader_lock;
470 arch_spinlock_t lock;
471 struct lock_class_key lock_key;
472 struct buffer_data_page *free_page;
473 unsigned long nr_pages;
474 unsigned int current_context;
475 struct list_head *pages;
476 struct buffer_page *head_page;
477 struct buffer_page *tail_page;
478 struct buffer_page *commit_page;
479 struct buffer_page *reader_page;
480 unsigned long lost_events;
481 unsigned long last_overrun;
482 unsigned long nest;
483 local_t entries_bytes;
484 local_t entries;
485 local_t overrun;
486 local_t commit_overrun;
487 local_t dropped_events;
488 local_t committing;
489 local_t commits;
490 unsigned long read;
491 unsigned long read_bytes;
492 u64 write_stamp;
493 u64 read_stamp;
494
495 long nr_pages_to_update;
496 struct list_head new_pages;
497 struct work_struct update_pages_work;
498 struct completion update_done;
499
500 struct rb_irq_work irq_work;
501};
502
503struct ring_buffer {
504 unsigned flags;
505 int cpus;
506 atomic_t record_disabled;
507 atomic_t resize_disabled;
508 cpumask_var_t cpumask;
509
510 struct lock_class_key *reader_lock_key;
511
512 struct mutex mutex;
513
514 struct ring_buffer_per_cpu **buffers;
515
516 struct hlist_node node;
517 u64 (*clock)(void);
518
519 struct rb_irq_work irq_work;
520 bool time_stamp_abs;
521};
522
523struct ring_buffer_iter {
524 struct ring_buffer_per_cpu *cpu_buffer;
525 unsigned long head;
526 struct buffer_page *head_page;
527 struct buffer_page *cache_reader_page;
528 unsigned long cache_read;
529 u64 read_stamp;
530};
531
532
533
534
535
536
537
538static void rb_wake_up_waiters(struct irq_work *work)
539{
540 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
541
542 wake_up_all(&rbwork->waiters);
543 if (rbwork->wakeup_full) {
544 rbwork->wakeup_full = false;
545 wake_up_all(&rbwork->full_waiters);
546 }
547}
548
549
550
551
552
553
554
555
556
557
558
559int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
560{
561 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
562 DEFINE_WAIT(wait);
563 struct rb_irq_work *work;
564 int ret = 0;
565
566
567
568
569
570
571 if (cpu == RING_BUFFER_ALL_CPUS) {
572 work = &buffer->irq_work;
573
574 full = false;
575 } else {
576 if (!cpumask_test_cpu(cpu, buffer->cpumask))
577 return -ENODEV;
578 cpu_buffer = buffer->buffers[cpu];
579 work = &cpu_buffer->irq_work;
580 }
581
582
583 while (true) {
584 if (full)
585 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
586 else
587 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609 if (full)
610 work->full_waiters_pending = true;
611 else
612 work->waiters_pending = true;
613
614 if (signal_pending(current)) {
615 ret = -EINTR;
616 break;
617 }
618
619 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
620 break;
621
622 if (cpu != RING_BUFFER_ALL_CPUS &&
623 !ring_buffer_empty_cpu(buffer, cpu)) {
624 unsigned long flags;
625 bool pagebusy;
626
627 if (!full)
628 break;
629
630 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
631 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
632 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
633
634 if (!pagebusy)
635 break;
636 }
637
638 schedule();
639 }
640
641 if (full)
642 finish_wait(&work->full_waiters, &wait);
643 else
644 finish_wait(&work->waiters, &wait);
645
646 return ret;
647}
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
664 struct file *filp, poll_table *poll_table)
665{
666 struct ring_buffer_per_cpu *cpu_buffer;
667 struct rb_irq_work *work;
668
669 if (cpu == RING_BUFFER_ALL_CPUS)
670 work = &buffer->irq_work;
671 else {
672 if (!cpumask_test_cpu(cpu, buffer->cpumask))
673 return -EINVAL;
674
675 cpu_buffer = buffer->buffers[cpu];
676 work = &cpu_buffer->irq_work;
677 }
678
679 poll_wait(filp, &work->waiters, poll_table);
680 work->waiters_pending = true;
681
682
683
684
685
686
687
688
689
690
691
692
693
694 smp_mb();
695
696 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
697 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
698 return EPOLLIN | EPOLLRDNORM;
699 return 0;
700}
701
702
703#define RB_WARN_ON(b, cond) \
704 ({ \
705 int _____ret = unlikely(cond); \
706 if (_____ret) { \
707 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
708 struct ring_buffer_per_cpu *__b = \
709 (void *)b; \
710 atomic_inc(&__b->buffer->record_disabled); \
711 } else \
712 atomic_inc(&b->record_disabled); \
713 WARN_ON(1); \
714 } \
715 _____ret; \
716 })
717
718
719#define DEBUG_SHIFT 0
720
721static inline u64 rb_time_stamp(struct ring_buffer *buffer)
722{
723
724 return buffer->clock() << DEBUG_SHIFT;
725}
726
727u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
728{
729 u64 time;
730
731 preempt_disable_notrace();
732 time = rb_time_stamp(buffer);
733 preempt_enable_no_resched_notrace();
734
735 return time;
736}
737EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
738
739void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
740 int cpu, u64 *ts)
741{
742
743 *ts >>= DEBUG_SHIFT;
744}
745EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816#define RB_PAGE_NORMAL 0UL
817#define RB_PAGE_HEAD 1UL
818#define RB_PAGE_UPDATE 2UL
819
820
821#define RB_FLAG_MASK 3UL
822
823
824#define RB_PAGE_MOVED 4UL
825
826
827
828
829static struct list_head *rb_list_head(struct list_head *list)
830{
831 unsigned long val = (unsigned long)list;
832
833 return (struct list_head *)(val & ~RB_FLAG_MASK);
834}
835
836
837
838
839
840
841
842
843
844static inline int
845rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
846 struct buffer_page *page, struct list_head *list)
847{
848 unsigned long val;
849
850 val = (unsigned long)list->next;
851
852 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
853 return RB_PAGE_MOVED;
854
855 return val & RB_FLAG_MASK;
856}
857
858
859
860
861
862
863
864
865static bool rb_is_reader_page(struct buffer_page *page)
866{
867 struct list_head *list = page->list.prev;
868
869 return rb_list_head(list->next) != &page->list;
870}
871
872
873
874
875static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
876 struct list_head *list)
877{
878 unsigned long *ptr;
879
880 ptr = (unsigned long *)&list->next;
881 *ptr |= RB_PAGE_HEAD;
882 *ptr &= ~RB_PAGE_UPDATE;
883}
884
885
886
887
888static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
889{
890 struct buffer_page *head;
891
892 head = cpu_buffer->head_page;
893 if (!head)
894 return;
895
896
897
898
899 rb_set_list_to_head(cpu_buffer, head->list.prev);
900}
901
902static void rb_list_head_clear(struct list_head *list)
903{
904 unsigned long *ptr = (unsigned long *)&list->next;
905
906 *ptr &= ~RB_FLAG_MASK;
907}
908
909
910
911
912static void
913rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
914{
915 struct list_head *hd;
916
917
918 rb_list_head_clear(cpu_buffer->pages);
919
920 list_for_each(hd, cpu_buffer->pages)
921 rb_list_head_clear(hd);
922}
923
924static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
925 struct buffer_page *head,
926 struct buffer_page *prev,
927 int old_flag, int new_flag)
928{
929 struct list_head *list;
930 unsigned long val = (unsigned long)&head->list;
931 unsigned long ret;
932
933 list = &prev->list;
934
935 val &= ~RB_FLAG_MASK;
936
937 ret = cmpxchg((unsigned long *)&list->next,
938 val | old_flag, val | new_flag);
939
940
941 if ((ret & ~RB_FLAG_MASK) != val)
942 return RB_PAGE_MOVED;
943
944 return ret & RB_FLAG_MASK;
945}
946
947static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
948 struct buffer_page *head,
949 struct buffer_page *prev,
950 int old_flag)
951{
952 return rb_head_page_set(cpu_buffer, head, prev,
953 old_flag, RB_PAGE_UPDATE);
954}
955
956static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
957 struct buffer_page *head,
958 struct buffer_page *prev,
959 int old_flag)
960{
961 return rb_head_page_set(cpu_buffer, head, prev,
962 old_flag, RB_PAGE_HEAD);
963}
964
965static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
966 struct buffer_page *head,
967 struct buffer_page *prev,
968 int old_flag)
969{
970 return rb_head_page_set(cpu_buffer, head, prev,
971 old_flag, RB_PAGE_NORMAL);
972}
973
974static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
975 struct buffer_page **bpage)
976{
977 struct list_head *p = rb_list_head((*bpage)->list.next);
978
979 *bpage = list_entry(p, struct buffer_page, list);
980}
981
982static struct buffer_page *
983rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
984{
985 struct buffer_page *head;
986 struct buffer_page *page;
987 struct list_head *list;
988 int i;
989
990 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
991 return NULL;
992
993
994 list = cpu_buffer->pages;
995 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
996 return NULL;
997
998 page = head = cpu_buffer->head_page;
999
1000
1001
1002
1003
1004
1005 for (i = 0; i < 3; i++) {
1006 do {
1007 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
1008 cpu_buffer->head_page = page;
1009 return page;
1010 }
1011 rb_inc_page(cpu_buffer, &page);
1012 } while (page != head);
1013 }
1014
1015 RB_WARN_ON(cpu_buffer, 1);
1016
1017 return NULL;
1018}
1019
1020static int rb_head_page_replace(struct buffer_page *old,
1021 struct buffer_page *new)
1022{
1023 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
1024 unsigned long val;
1025 unsigned long ret;
1026
1027 val = *ptr & ~RB_FLAG_MASK;
1028 val |= RB_PAGE_HEAD;
1029
1030 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
1031
1032 return ret == val;
1033}
1034
1035
1036
1037
1038static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1039 struct buffer_page *tail_page,
1040 struct buffer_page *next_page)
1041{
1042 unsigned long old_entries;
1043 unsigned long old_write;
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1055 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1056
1057
1058
1059
1060
1061 barrier();
1062
1063
1064
1065
1066
1067
1068 if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
1069
1070 unsigned long val = old_write & ~RB_WRITE_MASK;
1071 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083 (void)local_cmpxchg(&next_page->write, old_write, val);
1084 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1085
1086
1087
1088
1089
1090
1091 local_set(&next_page->page->commit, 0);
1092
1093
1094 (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
1095 }
1096}
1097
1098static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1099 struct buffer_page *bpage)
1100{
1101 unsigned long val = (unsigned long)bpage;
1102
1103 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1104 return 1;
1105
1106 return 0;
1107}
1108
1109
1110
1111
1112static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1113 struct list_head *list)
1114{
1115 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1116 return 1;
1117 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1118 return 1;
1119 return 0;
1120}
1121
1122
1123
1124
1125
1126
1127
1128
1129static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1130{
1131 struct list_head *head = cpu_buffer->pages;
1132 struct buffer_page *bpage, *tmp;
1133
1134
1135 if (cpu_buffer->head_page)
1136 rb_set_head_page(cpu_buffer);
1137
1138 rb_head_page_deactivate(cpu_buffer);
1139
1140 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1141 return -1;
1142 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1143 return -1;
1144
1145 if (rb_check_list(cpu_buffer, head))
1146 return -1;
1147
1148 list_for_each_entry_safe(bpage, tmp, head, list) {
1149 if (RB_WARN_ON(cpu_buffer,
1150 bpage->list.next->prev != &bpage->list))
1151 return -1;
1152 if (RB_WARN_ON(cpu_buffer,
1153 bpage->list.prev->next != &bpage->list))
1154 return -1;
1155 if (rb_check_list(cpu_buffer, &bpage->list))
1156 return -1;
1157 }
1158
1159 rb_head_page_activate(cpu_buffer);
1160
1161 return 0;
1162}
1163
1164static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
1165{
1166 struct buffer_page *bpage, *tmp;
1167 bool user_thread = current->mm != NULL;
1168 gfp_t mflags;
1169 long i;
1170
1171
1172
1173
1174
1175
1176
1177
1178 i = si_mem_available();
1179 if (i < nr_pages)
1180 return -ENOMEM;
1181
1182
1183
1184
1185
1186
1187 mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198 if (user_thread)
1199 set_current_oom_origin();
1200 for (i = 0; i < nr_pages; i++) {
1201 struct page *page;
1202
1203 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1204 mflags, cpu_to_node(cpu));
1205 if (!bpage)
1206 goto free_pages;
1207
1208 list_add(&bpage->list, pages);
1209
1210 page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
1211 if (!page)
1212 goto free_pages;
1213 bpage->page = page_address(page);
1214 rb_init_page(bpage->page);
1215
1216 if (user_thread && fatal_signal_pending(current))
1217 goto free_pages;
1218 }
1219 if (user_thread)
1220 clear_current_oom_origin();
1221
1222 return 0;
1223
1224free_pages:
1225 list_for_each_entry_safe(bpage, tmp, pages, list) {
1226 list_del_init(&bpage->list);
1227 free_buffer_page(bpage);
1228 }
1229 if (user_thread)
1230 clear_current_oom_origin();
1231
1232 return -ENOMEM;
1233}
1234
1235static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1236 unsigned long nr_pages)
1237{
1238 LIST_HEAD(pages);
1239
1240 WARN_ON(!nr_pages);
1241
1242 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1243 return -ENOMEM;
1244
1245
1246
1247
1248
1249
1250 cpu_buffer->pages = pages.next;
1251 list_del(&pages);
1252
1253 cpu_buffer->nr_pages = nr_pages;
1254
1255 rb_check_pages(cpu_buffer);
1256
1257 return 0;
1258}
1259
1260static struct ring_buffer_per_cpu *
1261rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu)
1262{
1263 struct ring_buffer_per_cpu *cpu_buffer;
1264 struct buffer_page *bpage;
1265 struct page *page;
1266 int ret;
1267
1268 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1269 GFP_KERNEL, cpu_to_node(cpu));
1270 if (!cpu_buffer)
1271 return NULL;
1272
1273 cpu_buffer->cpu = cpu;
1274 cpu_buffer->buffer = buffer;
1275 raw_spin_lock_init(&cpu_buffer->reader_lock);
1276 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1277 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1278 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1279 init_completion(&cpu_buffer->update_done);
1280 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1281 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1282 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1283
1284 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1285 GFP_KERNEL, cpu_to_node(cpu));
1286 if (!bpage)
1287 goto fail_free_buffer;
1288
1289 rb_check_bpage(cpu_buffer, bpage);
1290
1291 cpu_buffer->reader_page = bpage;
1292 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1293 if (!page)
1294 goto fail_free_reader;
1295 bpage->page = page_address(page);
1296 rb_init_page(bpage->page);
1297
1298 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1299 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1300
1301 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1302 if (ret < 0)
1303 goto fail_free_reader;
1304
1305 cpu_buffer->head_page
1306 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1307 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1308
1309 rb_head_page_activate(cpu_buffer);
1310
1311 return cpu_buffer;
1312
1313 fail_free_reader:
1314 free_buffer_page(cpu_buffer->reader_page);
1315
1316 fail_free_buffer:
1317 kfree(cpu_buffer);
1318 return NULL;
1319}
1320
1321static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1322{
1323 struct list_head *head = cpu_buffer->pages;
1324 struct buffer_page *bpage, *tmp;
1325
1326 free_buffer_page(cpu_buffer->reader_page);
1327
1328 rb_head_page_deactivate(cpu_buffer);
1329
1330 if (head) {
1331 list_for_each_entry_safe(bpage, tmp, head, list) {
1332 list_del_init(&bpage->list);
1333 free_buffer_page(bpage);
1334 }
1335 bpage = list_entry(head, struct buffer_page, list);
1336 free_buffer_page(bpage);
1337 }
1338
1339 kfree(cpu_buffer);
1340}
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1353 struct lock_class_key *key)
1354{
1355 struct ring_buffer *buffer;
1356 long nr_pages;
1357 int bsize;
1358 int cpu;
1359 int ret;
1360
1361
1362 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1363 GFP_KERNEL);
1364 if (!buffer)
1365 return NULL;
1366
1367 if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1368 goto fail_free_buffer;
1369
1370 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1371 buffer->flags = flags;
1372 buffer->clock = trace_clock_local;
1373 buffer->reader_lock_key = key;
1374
1375 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1376 init_waitqueue_head(&buffer->irq_work.waiters);
1377
1378
1379 if (nr_pages < 2)
1380 nr_pages = 2;
1381
1382 buffer->cpus = nr_cpu_ids;
1383
1384 bsize = sizeof(void *) * nr_cpu_ids;
1385 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1386 GFP_KERNEL);
1387 if (!buffer->buffers)
1388 goto fail_free_cpumask;
1389
1390 cpu = raw_smp_processor_id();
1391 cpumask_set_cpu(cpu, buffer->cpumask);
1392 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1393 if (!buffer->buffers[cpu])
1394 goto fail_free_buffers;
1395
1396 ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1397 if (ret < 0)
1398 goto fail_free_buffers;
1399
1400 mutex_init(&buffer->mutex);
1401
1402 return buffer;
1403
1404 fail_free_buffers:
1405 for_each_buffer_cpu(buffer, cpu) {
1406 if (buffer->buffers[cpu])
1407 rb_free_cpu_buffer(buffer->buffers[cpu]);
1408 }
1409 kfree(buffer->buffers);
1410
1411 fail_free_cpumask:
1412 free_cpumask_var(buffer->cpumask);
1413
1414 fail_free_buffer:
1415 kfree(buffer);
1416 return NULL;
1417}
1418EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1419
1420
1421
1422
1423
1424void
1425ring_buffer_free(struct ring_buffer *buffer)
1426{
1427 int cpu;
1428
1429 cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1430
1431 for_each_buffer_cpu(buffer, cpu)
1432 rb_free_cpu_buffer(buffer->buffers[cpu]);
1433
1434 kfree(buffer->buffers);
1435 free_cpumask_var(buffer->cpumask);
1436
1437 kfree(buffer);
1438}
1439EXPORT_SYMBOL_GPL(ring_buffer_free);
1440
1441void ring_buffer_set_clock(struct ring_buffer *buffer,
1442 u64 (*clock)(void))
1443{
1444 buffer->clock = clock;
1445}
1446
1447void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
1448{
1449 buffer->time_stamp_abs = abs;
1450}
1451
1452bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
1453{
1454 return buffer->time_stamp_abs;
1455}
1456
1457static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1458
1459static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1460{
1461 return local_read(&bpage->entries) & RB_WRITE_MASK;
1462}
1463
1464static inline unsigned long rb_page_write(struct buffer_page *bpage)
1465{
1466 return local_read(&bpage->write) & RB_WRITE_MASK;
1467}
1468
1469static int
1470rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
1471{
1472 struct list_head *tail_page, *to_remove, *next_page;
1473 struct buffer_page *to_remove_page, *tmp_iter_page;
1474 struct buffer_page *last_page, *first_page;
1475 unsigned long nr_removed;
1476 unsigned long head_bit;
1477 int page_entries;
1478
1479 head_bit = 0;
1480
1481 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1482 atomic_inc(&cpu_buffer->record_disabled);
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492 tail_page = &cpu_buffer->tail_page->list;
1493
1494
1495
1496
1497
1498 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1499 tail_page = rb_list_head(tail_page->next);
1500 to_remove = tail_page;
1501
1502
1503 first_page = list_entry(rb_list_head(to_remove->next),
1504 struct buffer_page, list);
1505
1506 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1507 to_remove = rb_list_head(to_remove)->next;
1508 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1509 }
1510
1511 next_page = rb_list_head(to_remove)->next;
1512
1513
1514
1515
1516
1517
1518 tail_page->next = (struct list_head *)((unsigned long)next_page |
1519 head_bit);
1520 next_page = rb_list_head(next_page);
1521 next_page->prev = tail_page;
1522
1523
1524 cpu_buffer->pages = next_page;
1525
1526
1527 if (head_bit)
1528 cpu_buffer->head_page = list_entry(next_page,
1529 struct buffer_page, list);
1530
1531
1532
1533
1534
1535 cpu_buffer->read = 0;
1536
1537
1538 atomic_dec(&cpu_buffer->record_disabled);
1539 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1540
1541 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1542
1543
1544 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1545 list);
1546 tmp_iter_page = first_page;
1547
1548 do {
1549 cond_resched();
1550
1551 to_remove_page = tmp_iter_page;
1552 rb_inc_page(cpu_buffer, &tmp_iter_page);
1553
1554
1555 page_entries = rb_page_entries(to_remove_page);
1556 if (page_entries) {
1557
1558
1559
1560
1561
1562
1563 local_add(page_entries, &cpu_buffer->overrun);
1564 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1565 }
1566
1567
1568
1569
1570
1571 free_buffer_page(to_remove_page);
1572 nr_removed--;
1573
1574 } while (to_remove_page != last_page);
1575
1576 RB_WARN_ON(cpu_buffer, nr_removed);
1577
1578 return nr_removed == 0;
1579}
1580
1581static int
1582rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1583{
1584 struct list_head *pages = &cpu_buffer->new_pages;
1585 int retries, success;
1586
1587 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602 retries = 10;
1603 success = 0;
1604 while (retries--) {
1605 struct list_head *head_page, *prev_page, *r;
1606 struct list_head *last_page, *first_page;
1607 struct list_head *head_page_with_bit;
1608
1609 head_page = &rb_set_head_page(cpu_buffer)->list;
1610 if (!head_page)
1611 break;
1612 prev_page = head_page->prev;
1613
1614 first_page = pages->next;
1615 last_page = pages->prev;
1616
1617 head_page_with_bit = (struct list_head *)
1618 ((unsigned long)head_page | RB_PAGE_HEAD);
1619
1620 last_page->next = head_page_with_bit;
1621 first_page->prev = prev_page;
1622
1623 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1624
1625 if (r == head_page_with_bit) {
1626
1627
1628
1629
1630
1631 head_page->prev = last_page;
1632 success = 1;
1633 break;
1634 }
1635 }
1636
1637 if (success)
1638 INIT_LIST_HEAD(pages);
1639
1640
1641
1642
1643 RB_WARN_ON(cpu_buffer, !success);
1644 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1645
1646
1647 if (!success) {
1648 struct buffer_page *bpage, *tmp;
1649 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1650 list) {
1651 list_del_init(&bpage->list);
1652 free_buffer_page(bpage);
1653 }
1654 }
1655 return success;
1656}
1657
1658static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1659{
1660 int success;
1661
1662 if (cpu_buffer->nr_pages_to_update > 0)
1663 success = rb_insert_pages(cpu_buffer);
1664 else
1665 success = rb_remove_pages(cpu_buffer,
1666 -cpu_buffer->nr_pages_to_update);
1667
1668 if (success)
1669 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1670}
1671
1672static void update_pages_handler(struct work_struct *work)
1673{
1674 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1675 struct ring_buffer_per_cpu, update_pages_work);
1676 rb_update_pages(cpu_buffer);
1677 complete(&cpu_buffer->update_done);
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1691 int cpu_id)
1692{
1693 struct ring_buffer_per_cpu *cpu_buffer;
1694 unsigned long nr_pages;
1695 int cpu, err = 0;
1696
1697
1698
1699
1700 if (!buffer)
1701 return size;
1702
1703
1704 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1705 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1706 return size;
1707
1708 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1709
1710
1711 if (nr_pages < 2)
1712 nr_pages = 2;
1713
1714 size = nr_pages * BUF_PAGE_SIZE;
1715
1716
1717
1718
1719
1720
1721 if (atomic_read(&buffer->resize_disabled))
1722 return -EBUSY;
1723
1724
1725 mutex_lock(&buffer->mutex);
1726
1727 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1728
1729 for_each_buffer_cpu(buffer, cpu) {
1730 cpu_buffer = buffer->buffers[cpu];
1731
1732 cpu_buffer->nr_pages_to_update = nr_pages -
1733 cpu_buffer->nr_pages;
1734
1735
1736
1737 if (cpu_buffer->nr_pages_to_update <= 0)
1738 continue;
1739
1740
1741
1742
1743 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1744 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1745 &cpu_buffer->new_pages, cpu)) {
1746
1747 err = -ENOMEM;
1748 goto out_err;
1749 }
1750 }
1751
1752 get_online_cpus();
1753
1754
1755
1756
1757
1758 for_each_buffer_cpu(buffer, cpu) {
1759 cpu_buffer = buffer->buffers[cpu];
1760 if (!cpu_buffer->nr_pages_to_update)
1761 continue;
1762
1763
1764 if (!cpu_online(cpu)) {
1765 rb_update_pages(cpu_buffer);
1766 cpu_buffer->nr_pages_to_update = 0;
1767 } else {
1768 schedule_work_on(cpu,
1769 &cpu_buffer->update_pages_work);
1770 }
1771 }
1772
1773
1774 for_each_buffer_cpu(buffer, cpu) {
1775 cpu_buffer = buffer->buffers[cpu];
1776 if (!cpu_buffer->nr_pages_to_update)
1777 continue;
1778
1779 if (cpu_online(cpu))
1780 wait_for_completion(&cpu_buffer->update_done);
1781 cpu_buffer->nr_pages_to_update = 0;
1782 }
1783
1784 put_online_cpus();
1785 } else {
1786
1787 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1788 goto out;
1789
1790 cpu_buffer = buffer->buffers[cpu_id];
1791
1792 if (nr_pages == cpu_buffer->nr_pages)
1793 goto out;
1794
1795 cpu_buffer->nr_pages_to_update = nr_pages -
1796 cpu_buffer->nr_pages;
1797
1798 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1799 if (cpu_buffer->nr_pages_to_update > 0 &&
1800 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1801 &cpu_buffer->new_pages, cpu_id)) {
1802 err = -ENOMEM;
1803 goto out_err;
1804 }
1805
1806 get_online_cpus();
1807
1808
1809 if (!cpu_online(cpu_id))
1810 rb_update_pages(cpu_buffer);
1811 else {
1812 schedule_work_on(cpu_id,
1813 &cpu_buffer->update_pages_work);
1814 wait_for_completion(&cpu_buffer->update_done);
1815 }
1816
1817 cpu_buffer->nr_pages_to_update = 0;
1818 put_online_cpus();
1819 }
1820
1821 out:
1822
1823
1824
1825
1826
1827
1828
1829 if (atomic_read(&buffer->record_disabled)) {
1830 atomic_inc(&buffer->record_disabled);
1831
1832
1833
1834
1835
1836
1837 synchronize_sched();
1838 for_each_buffer_cpu(buffer, cpu) {
1839 cpu_buffer = buffer->buffers[cpu];
1840 rb_check_pages(cpu_buffer);
1841 }
1842 atomic_dec(&buffer->record_disabled);
1843 }
1844
1845 mutex_unlock(&buffer->mutex);
1846 return size;
1847
1848 out_err:
1849 for_each_buffer_cpu(buffer, cpu) {
1850 struct buffer_page *bpage, *tmp;
1851
1852 cpu_buffer = buffer->buffers[cpu];
1853 cpu_buffer->nr_pages_to_update = 0;
1854
1855 if (list_empty(&cpu_buffer->new_pages))
1856 continue;
1857
1858 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1859 list) {
1860 list_del_init(&bpage->list);
1861 free_buffer_page(bpage);
1862 }
1863 }
1864 mutex_unlock(&buffer->mutex);
1865 return err;
1866}
1867EXPORT_SYMBOL_GPL(ring_buffer_resize);
1868
1869void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1870{
1871 mutex_lock(&buffer->mutex);
1872 if (val)
1873 buffer->flags |= RB_FL_OVERWRITE;
1874 else
1875 buffer->flags &= ~RB_FL_OVERWRITE;
1876 mutex_unlock(&buffer->mutex);
1877}
1878EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1879
1880static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1881{
1882 return bpage->page->data + index;
1883}
1884
1885static __always_inline struct ring_buffer_event *
1886rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1887{
1888 return __rb_page_index(cpu_buffer->reader_page,
1889 cpu_buffer->reader_page->read);
1890}
1891
1892static __always_inline struct ring_buffer_event *
1893rb_iter_head_event(struct ring_buffer_iter *iter)
1894{
1895 return __rb_page_index(iter->head_page, iter->head);
1896}
1897
1898static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
1899{
1900 return local_read(&bpage->page->commit);
1901}
1902
1903
1904static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
1905{
1906 return rb_page_commit(bpage);
1907}
1908
1909static __always_inline unsigned
1910rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1911{
1912 return rb_page_commit(cpu_buffer->commit_page);
1913}
1914
1915static __always_inline unsigned
1916rb_event_index(struct ring_buffer_event *event)
1917{
1918 unsigned long addr = (unsigned long)event;
1919
1920 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1921}
1922
1923static void rb_inc_iter(struct ring_buffer_iter *iter)
1924{
1925 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1926
1927
1928
1929
1930
1931
1932
1933 if (iter->head_page == cpu_buffer->reader_page)
1934 iter->head_page = rb_set_head_page(cpu_buffer);
1935 else
1936 rb_inc_page(cpu_buffer, &iter->head_page);
1937
1938 iter->read_stamp = iter->head_page->page->time_stamp;
1939 iter->head = 0;
1940}
1941
1942
1943
1944
1945
1946
1947
1948
1949static int
1950rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1951 struct buffer_page *tail_page,
1952 struct buffer_page *next_page)
1953{
1954 struct buffer_page *new_head;
1955 int entries;
1956 int type;
1957 int ret;
1958
1959 entries = rb_page_entries(next_page);
1960
1961
1962
1963
1964
1965
1966 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1967 RB_PAGE_HEAD);
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980 switch (type) {
1981 case RB_PAGE_HEAD:
1982
1983
1984
1985
1986
1987 local_add(entries, &cpu_buffer->overrun);
1988 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1989
1990
1991
1992
1993
1994
1995
1996 break;
1997
1998 case RB_PAGE_UPDATE:
1999
2000
2001
2002
2003 break;
2004 case RB_PAGE_NORMAL:
2005
2006
2007
2008
2009
2010 return 1;
2011 case RB_PAGE_MOVED:
2012
2013
2014
2015
2016
2017 return 1;
2018 default:
2019 RB_WARN_ON(cpu_buffer, 1);
2020 return -1;
2021 }
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037 new_head = next_page;
2038 rb_inc_page(cpu_buffer, &new_head);
2039
2040 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
2041 RB_PAGE_NORMAL);
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051 switch (ret) {
2052 case RB_PAGE_HEAD:
2053 case RB_PAGE_NORMAL:
2054
2055 break;
2056 default:
2057 RB_WARN_ON(cpu_buffer, 1);
2058 return -1;
2059 }
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071 if (ret == RB_PAGE_NORMAL) {
2072 struct buffer_page *buffer_tail_page;
2073
2074 buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
2075
2076
2077
2078
2079 if (buffer_tail_page != tail_page &&
2080 buffer_tail_page != next_page)
2081 rb_head_page_set_normal(cpu_buffer, new_head,
2082 next_page,
2083 RB_PAGE_HEAD);
2084 }
2085
2086
2087
2088
2089
2090
2091 if (type == RB_PAGE_HEAD) {
2092 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2093 tail_page,
2094 RB_PAGE_UPDATE);
2095 if (RB_WARN_ON(cpu_buffer,
2096 ret != RB_PAGE_UPDATE))
2097 return -1;
2098 }
2099
2100 return 0;
2101}
2102
2103static inline void
2104rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2105 unsigned long tail, struct rb_event_info *info)
2106{
2107 struct buffer_page *tail_page = info->tail_page;
2108 struct ring_buffer_event *event;
2109 unsigned long length = info->length;
2110
2111
2112
2113
2114
2115 if (tail >= BUF_PAGE_SIZE) {
2116
2117
2118
2119
2120
2121 if (tail == BUF_PAGE_SIZE)
2122 tail_page->real_end = 0;
2123
2124 local_sub(length, &tail_page->write);
2125 return;
2126 }
2127
2128 event = __rb_page_index(tail_page, tail);
2129
2130
2131 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2132
2133
2134
2135
2136
2137
2138 tail_page->real_end = tail;
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2152
2153
2154
2155 rb_event_set_padding(event);
2156
2157
2158 local_sub(length, &tail_page->write);
2159 return;
2160 }
2161
2162
2163 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2164 event->type_len = RINGBUF_TYPE_PADDING;
2165
2166 event->time_delta = 1;
2167
2168
2169 length = (tail + length) - BUF_PAGE_SIZE;
2170 local_sub(length, &tail_page->write);
2171}
2172
2173static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2174
2175
2176
2177
2178static noinline struct ring_buffer_event *
2179rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2180 unsigned long tail, struct rb_event_info *info)
2181{
2182 struct buffer_page *tail_page = info->tail_page;
2183 struct buffer_page *commit_page = cpu_buffer->commit_page;
2184 struct ring_buffer *buffer = cpu_buffer->buffer;
2185 struct buffer_page *next_page;
2186 int ret;
2187
2188 next_page = tail_page;
2189
2190 rb_inc_page(cpu_buffer, &next_page);
2191
2192
2193
2194
2195
2196
2197 if (unlikely(next_page == commit_page)) {
2198 local_inc(&cpu_buffer->commit_overrun);
2199 goto out_reset;
2200 }
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2217
2218
2219
2220
2221
2222 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2223
2224
2225
2226
2227 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2228 local_inc(&cpu_buffer->dropped_events);
2229 goto out_reset;
2230 }
2231
2232 ret = rb_handle_head_page(cpu_buffer,
2233 tail_page,
2234 next_page);
2235 if (ret < 0)
2236 goto out_reset;
2237 if (ret)
2238 goto out_again;
2239 } else {
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250 if (unlikely((cpu_buffer->commit_page !=
2251 cpu_buffer->tail_page) &&
2252 (cpu_buffer->commit_page ==
2253 cpu_buffer->reader_page))) {
2254 local_inc(&cpu_buffer->commit_overrun);
2255 goto out_reset;
2256 }
2257 }
2258 }
2259
2260 rb_tail_page_update(cpu_buffer, tail_page, next_page);
2261
2262 out_again:
2263
2264 rb_reset_tail(cpu_buffer, tail, info);
2265
2266
2267 rb_end_commit(cpu_buffer);
2268
2269 local_inc(&cpu_buffer->committing);
2270
2271
2272 return ERR_PTR(-EAGAIN);
2273
2274 out_reset:
2275
2276 rb_reset_tail(cpu_buffer, tail, info);
2277
2278 return NULL;
2279}
2280
2281
2282static noinline struct ring_buffer_event *
2283rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
2284{
2285 if (abs)
2286 event->type_len = RINGBUF_TYPE_TIME_STAMP;
2287 else
2288 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2289
2290
2291 if (abs || rb_event_index(event)) {
2292 event->time_delta = delta & TS_MASK;
2293 event->array[0] = delta >> TS_SHIFT;
2294 } else {
2295
2296 event->time_delta = 0;
2297 event->array[0] = 0;
2298 }
2299
2300 return skip_time_extend(event);
2301}
2302
2303static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2304 struct ring_buffer_event *event);
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317static void
2318rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2319 struct ring_buffer_event *event,
2320 struct rb_event_info *info)
2321{
2322 unsigned length = info->length;
2323 u64 delta = info->delta;
2324
2325
2326 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2327 delta = 0;
2328
2329
2330
2331
2332
2333 if (unlikely(info->add_timestamp)) {
2334 bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
2335
2336 event = rb_add_time_stamp(event, info->delta, abs);
2337 length -= RB_LEN_TIME_EXTEND;
2338 delta = 0;
2339 }
2340
2341 event->time_delta = delta;
2342 length -= RB_EVNT_HDR_SIZE;
2343 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2344 event->type_len = 0;
2345 event->array[0] = length;
2346 } else
2347 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2348}
2349
2350static unsigned rb_calculate_event_length(unsigned length)
2351{
2352 struct ring_buffer_event event;
2353
2354
2355 if (!length)
2356 length++;
2357
2358 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2359 length += sizeof(event.array[0]);
2360
2361 length += RB_EVNT_HDR_SIZE;
2362 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2377 length += RB_ALIGNMENT;
2378
2379 return length;
2380}
2381
2382#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2383static inline bool sched_clock_stable(void)
2384{
2385 return true;
2386}
2387#endif
2388
2389static inline int
2390rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2391 struct ring_buffer_event *event)
2392{
2393 unsigned long new_index, old_index;
2394 struct buffer_page *bpage;
2395 unsigned long index;
2396 unsigned long addr;
2397
2398 new_index = rb_event_index(event);
2399 old_index = new_index + rb_event_ts_length(event);
2400 addr = (unsigned long)event;
2401 addr &= PAGE_MASK;
2402
2403 bpage = READ_ONCE(cpu_buffer->tail_page);
2404
2405 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2406 unsigned long write_mask =
2407 local_read(&bpage->write) & ~RB_WRITE_MASK;
2408 unsigned long event_length = rb_event_length(event);
2409
2410
2411
2412
2413
2414
2415 old_index += write_mask;
2416 new_index += write_mask;
2417 index = local_cmpxchg(&bpage->write, old_index, new_index);
2418 if (index == old_index) {
2419
2420 local_sub(event_length, &cpu_buffer->entries_bytes);
2421 return 1;
2422 }
2423 }
2424
2425
2426 return 0;
2427}
2428
2429static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2430{
2431 local_inc(&cpu_buffer->committing);
2432 local_inc(&cpu_buffer->commits);
2433}
2434
2435static __always_inline void
2436rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2437{
2438 unsigned long max_count;
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448 again:
2449 max_count = cpu_buffer->nr_pages * 100;
2450
2451 while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2452 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2453 return;
2454 if (RB_WARN_ON(cpu_buffer,
2455 rb_is_reader_page(cpu_buffer->tail_page)))
2456 return;
2457 local_set(&cpu_buffer->commit_page->page->commit,
2458 rb_page_write(cpu_buffer->commit_page));
2459 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
2460
2461 if (rb_page_write(cpu_buffer->commit_page))
2462 cpu_buffer->write_stamp =
2463 cpu_buffer->commit_page->page->time_stamp;
2464
2465 barrier();
2466 }
2467 while (rb_commit_index(cpu_buffer) !=
2468 rb_page_write(cpu_buffer->commit_page)) {
2469
2470 local_set(&cpu_buffer->commit_page->page->commit,
2471 rb_page_write(cpu_buffer->commit_page));
2472 RB_WARN_ON(cpu_buffer,
2473 local_read(&cpu_buffer->commit_page->page->commit) &
2474 ~RB_WRITE_MASK);
2475 barrier();
2476 }
2477
2478
2479 barrier();
2480
2481
2482
2483
2484
2485
2486 if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
2487 goto again;
2488}
2489
2490static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2491{
2492 unsigned long commits;
2493
2494 if (RB_WARN_ON(cpu_buffer,
2495 !local_read(&cpu_buffer->committing)))
2496 return;
2497
2498 again:
2499 commits = local_read(&cpu_buffer->commits);
2500
2501 barrier();
2502 if (local_read(&cpu_buffer->committing) == 1)
2503 rb_set_commit_to_write(cpu_buffer);
2504
2505 local_dec(&cpu_buffer->committing);
2506
2507
2508 barrier();
2509
2510
2511
2512
2513
2514
2515 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2516 !local_read(&cpu_buffer->committing)) {
2517 local_inc(&cpu_buffer->committing);
2518 goto again;
2519 }
2520}
2521
2522static inline void rb_event_discard(struct ring_buffer_event *event)
2523{
2524 if (extended_time(event))
2525 event = skip_time_extend(event);
2526
2527
2528 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2529 event->type_len = RINGBUF_TYPE_PADDING;
2530
2531 if (!event->time_delta)
2532 event->time_delta = 1;
2533}
2534
2535static __always_inline bool
2536rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2537 struct ring_buffer_event *event)
2538{
2539 unsigned long addr = (unsigned long)event;
2540 unsigned long index;
2541
2542 index = rb_event_index(event);
2543 addr &= PAGE_MASK;
2544
2545 return cpu_buffer->commit_page->page == (void *)addr &&
2546 rb_commit_index(cpu_buffer) == index;
2547}
2548
2549static __always_inline void
2550rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2551 struct ring_buffer_event *event)
2552{
2553 u64 delta;
2554
2555
2556
2557
2558
2559 if (rb_event_is_commit(cpu_buffer, event)) {
2560
2561
2562
2563
2564 if (!rb_event_index(event))
2565 cpu_buffer->write_stamp =
2566 cpu_buffer->commit_page->page->time_stamp;
2567 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2568 delta = ring_buffer_event_time_stamp(event);
2569 cpu_buffer->write_stamp += delta;
2570 } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
2571 delta = ring_buffer_event_time_stamp(event);
2572 cpu_buffer->write_stamp = delta;
2573 } else
2574 cpu_buffer->write_stamp += event->time_delta;
2575 }
2576}
2577
2578static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2579 struct ring_buffer_event *event)
2580{
2581 local_inc(&cpu_buffer->entries);
2582 rb_update_write_stamp(cpu_buffer, event);
2583 rb_end_commit(cpu_buffer);
2584}
2585
2586static __always_inline void
2587rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2588{
2589 bool pagebusy;
2590
2591 if (buffer->irq_work.waiters_pending) {
2592 buffer->irq_work.waiters_pending = false;
2593
2594 irq_work_queue(&buffer->irq_work.work);
2595 }
2596
2597 if (cpu_buffer->irq_work.waiters_pending) {
2598 cpu_buffer->irq_work.waiters_pending = false;
2599
2600 irq_work_queue(&cpu_buffer->irq_work.work);
2601 }
2602
2603 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
2604
2605 if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
2606 cpu_buffer->irq_work.wakeup_full = true;
2607 cpu_buffer->irq_work.full_waiters_pending = false;
2608
2609 irq_work_queue(&cpu_buffer->irq_work.work);
2610 }
2611}
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651static __always_inline int
2652trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2653{
2654 unsigned int val = cpu_buffer->current_context;
2655 unsigned long pc = preempt_count();
2656 int bit;
2657
2658 if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
2659 bit = RB_CTX_NORMAL;
2660 else
2661 bit = pc & NMI_MASK ? RB_CTX_NMI :
2662 pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
2663
2664 if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
2665 return 1;
2666
2667 val |= (1 << (bit + cpu_buffer->nest));
2668 cpu_buffer->current_context = val;
2669
2670 return 0;
2671}
2672
2673static __always_inline void
2674trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2675{
2676 cpu_buffer->current_context &=
2677 cpu_buffer->current_context - (1 << cpu_buffer->nest);
2678}
2679
2680
2681#define NESTED_BITS 4
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696void ring_buffer_nest_start(struct ring_buffer *buffer)
2697{
2698 struct ring_buffer_per_cpu *cpu_buffer;
2699 int cpu;
2700
2701
2702 preempt_disable_notrace();
2703 cpu = raw_smp_processor_id();
2704 cpu_buffer = buffer->buffers[cpu];
2705
2706 cpu_buffer->nest += NESTED_BITS;
2707}
2708
2709
2710
2711
2712
2713
2714
2715
2716void ring_buffer_nest_end(struct ring_buffer *buffer)
2717{
2718 struct ring_buffer_per_cpu *cpu_buffer;
2719 int cpu;
2720
2721
2722 cpu = raw_smp_processor_id();
2723 cpu_buffer = buffer->buffers[cpu];
2724
2725 cpu_buffer->nest -= NESTED_BITS;
2726 preempt_enable_notrace();
2727}
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2739 struct ring_buffer_event *event)
2740{
2741 struct ring_buffer_per_cpu *cpu_buffer;
2742 int cpu = raw_smp_processor_id();
2743
2744 cpu_buffer = buffer->buffers[cpu];
2745
2746 rb_commit(cpu_buffer, event);
2747
2748 rb_wakeups(buffer, cpu_buffer);
2749
2750 trace_recursive_unlock(cpu_buffer);
2751
2752 preempt_enable_notrace();
2753
2754 return 0;
2755}
2756EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2757
2758static noinline void
2759rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2760 struct rb_event_info *info)
2761{
2762 WARN_ONCE(info->delta > (1ULL << 59),
2763 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2764 (unsigned long long)info->delta,
2765 (unsigned long long)info->ts,
2766 (unsigned long long)cpu_buffer->write_stamp,
2767 sched_clock_stable() ? "" :
2768 "If you just came from a suspend/resume,\n"
2769 "please switch to the trace global clock:\n"
2770 " echo global > /sys/kernel/debug/tracing/trace_clock\n"
2771 "or add trace_clock=global to the kernel command line\n");
2772 info->add_timestamp = 1;
2773}
2774
2775static struct ring_buffer_event *
2776__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2777 struct rb_event_info *info)
2778{
2779 struct ring_buffer_event *event;
2780 struct buffer_page *tail_page;
2781 unsigned long tail, write;
2782
2783
2784
2785
2786
2787
2788 if (unlikely(info->add_timestamp))
2789 info->length += RB_LEN_TIME_EXTEND;
2790
2791
2792 tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
2793 write = local_add_return(info->length, &tail_page->write);
2794
2795
2796 write &= RB_WRITE_MASK;
2797 tail = write - info->length;
2798
2799
2800
2801
2802
2803 if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
2804 info->delta = 0;
2805
2806
2807 if (unlikely(write > BUF_PAGE_SIZE))
2808 return rb_move_tail(cpu_buffer, tail, info);
2809
2810
2811
2812 event = __rb_page_index(tail_page, tail);
2813 rb_update_event(cpu_buffer, event, info);
2814
2815 local_inc(&tail_page->entries);
2816
2817
2818
2819
2820
2821 if (!tail)
2822 tail_page->page->time_stamp = info->ts;
2823
2824
2825 local_add(info->length, &cpu_buffer->entries_bytes);
2826
2827 return event;
2828}
2829
2830static __always_inline struct ring_buffer_event *
2831rb_reserve_next_event(struct ring_buffer *buffer,
2832 struct ring_buffer_per_cpu *cpu_buffer,
2833 unsigned long length)
2834{
2835 struct ring_buffer_event *event;
2836 struct rb_event_info info;
2837 int nr_loops = 0;
2838 u64 diff;
2839
2840 rb_start_commit(cpu_buffer);
2841
2842#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2843
2844
2845
2846
2847
2848
2849 barrier();
2850 if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
2851 local_dec(&cpu_buffer->committing);
2852 local_dec(&cpu_buffer->commits);
2853 return NULL;
2854 }
2855#endif
2856
2857 info.length = rb_calculate_event_length(length);
2858 again:
2859 info.add_timestamp = 0;
2860 info.delta = 0;
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2872 goto out_fail;
2873
2874 info.ts = rb_time_stamp(cpu_buffer->buffer);
2875 diff = info.ts - cpu_buffer->write_stamp;
2876
2877
2878 barrier();
2879
2880 if (ring_buffer_time_stamp_abs(buffer)) {
2881 info.delta = info.ts;
2882 rb_handle_timestamp(cpu_buffer, &info);
2883 } else
2884 if (likely(info.ts >= cpu_buffer->write_stamp)) {
2885 info.delta = diff;
2886 if (unlikely(test_time_stamp(info.delta)))
2887 rb_handle_timestamp(cpu_buffer, &info);
2888 }
2889
2890 event = __rb_reserve_next(cpu_buffer, &info);
2891
2892 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
2893 if (info.add_timestamp)
2894 info.length -= RB_LEN_TIME_EXTEND;
2895 goto again;
2896 }
2897
2898 if (!event)
2899 goto out_fail;
2900
2901 return event;
2902
2903 out_fail:
2904 rb_end_commit(cpu_buffer);
2905 return NULL;
2906}
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923struct ring_buffer_event *
2924ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2925{
2926 struct ring_buffer_per_cpu *cpu_buffer;
2927 struct ring_buffer_event *event;
2928 int cpu;
2929
2930
2931 preempt_disable_notrace();
2932
2933 if (unlikely(atomic_read(&buffer->record_disabled)))
2934 goto out;
2935
2936 cpu = raw_smp_processor_id();
2937
2938 if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
2939 goto out;
2940
2941 cpu_buffer = buffer->buffers[cpu];
2942
2943 if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
2944 goto out;
2945
2946 if (unlikely(length > BUF_MAX_DATA_SIZE))
2947 goto out;
2948
2949 if (unlikely(trace_recursive_lock(cpu_buffer)))
2950 goto out;
2951
2952 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2953 if (!event)
2954 goto out_unlock;
2955
2956 return event;
2957
2958 out_unlock:
2959 trace_recursive_unlock(cpu_buffer);
2960 out:
2961 preempt_enable_notrace();
2962 return NULL;
2963}
2964EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
2965
2966
2967
2968
2969
2970
2971
2972static inline void
2973rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2974 struct ring_buffer_event *event)
2975{
2976 unsigned long addr = (unsigned long)event;
2977 struct buffer_page *bpage = cpu_buffer->commit_page;
2978 struct buffer_page *start;
2979
2980 addr &= PAGE_MASK;
2981
2982
2983 if (likely(bpage->page == (void *)addr)) {
2984 local_dec(&bpage->entries);
2985 return;
2986 }
2987
2988
2989
2990
2991
2992 rb_inc_page(cpu_buffer, &bpage);
2993 start = bpage;
2994 do {
2995 if (bpage->page == (void *)addr) {
2996 local_dec(&bpage->entries);
2997 return;
2998 }
2999 rb_inc_page(cpu_buffer, &bpage);
3000 } while (bpage != start);
3001
3002
3003 RB_WARN_ON(cpu_buffer, 1);
3004}
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025void ring_buffer_discard_commit(struct ring_buffer *buffer,
3026 struct ring_buffer_event *event)
3027{
3028 struct ring_buffer_per_cpu *cpu_buffer;
3029 int cpu;
3030
3031
3032 rb_event_discard(event);
3033
3034 cpu = smp_processor_id();
3035 cpu_buffer = buffer->buffers[cpu];
3036
3037
3038
3039
3040
3041
3042 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
3043
3044 rb_decrement_entry(cpu_buffer, event);
3045 if (rb_try_to_discard(cpu_buffer, event))
3046 goto out;
3047
3048
3049
3050
3051
3052 rb_update_write_stamp(cpu_buffer, event);
3053 out:
3054 rb_end_commit(cpu_buffer);
3055
3056 trace_recursive_unlock(cpu_buffer);
3057
3058 preempt_enable_notrace();
3059
3060}
3061EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076int ring_buffer_write(struct ring_buffer *buffer,
3077 unsigned long length,
3078 void *data)
3079{
3080 struct ring_buffer_per_cpu *cpu_buffer;
3081 struct ring_buffer_event *event;
3082 void *body;
3083 int ret = -EBUSY;
3084 int cpu;
3085
3086 preempt_disable_notrace();
3087
3088 if (atomic_read(&buffer->record_disabled))
3089 goto out;
3090
3091 cpu = raw_smp_processor_id();
3092
3093 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3094 goto out;
3095
3096 cpu_buffer = buffer->buffers[cpu];
3097
3098 if (atomic_read(&cpu_buffer->record_disabled))
3099 goto out;
3100
3101 if (length > BUF_MAX_DATA_SIZE)
3102 goto out;
3103
3104 if (unlikely(trace_recursive_lock(cpu_buffer)))
3105 goto out;
3106
3107 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3108 if (!event)
3109 goto out_unlock;
3110
3111 body = rb_event_data(event);
3112
3113 memcpy(body, data, length);
3114
3115 rb_commit(cpu_buffer, event);
3116
3117 rb_wakeups(buffer, cpu_buffer);
3118
3119 ret = 0;
3120
3121 out_unlock:
3122 trace_recursive_unlock(cpu_buffer);
3123
3124 out:
3125 preempt_enable_notrace();
3126
3127 return ret;
3128}
3129EXPORT_SYMBOL_GPL(ring_buffer_write);
3130
3131static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3132{
3133 struct buffer_page *reader = cpu_buffer->reader_page;
3134 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3135 struct buffer_page *commit = cpu_buffer->commit_page;
3136
3137
3138 if (unlikely(!head))
3139 return true;
3140
3141 return reader->read == rb_page_commit(reader) &&
3142 (commit == reader ||
3143 (commit == head &&
3144 head->read == rb_page_commit(commit)));
3145}
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156void ring_buffer_record_disable(struct ring_buffer *buffer)
3157{
3158 atomic_inc(&buffer->record_disabled);
3159}
3160EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3161
3162
3163
3164
3165
3166
3167
3168
3169void ring_buffer_record_enable(struct ring_buffer *buffer)
3170{
3171 atomic_dec(&buffer->record_disabled);
3172}
3173EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186void ring_buffer_record_off(struct ring_buffer *buffer)
3187{
3188 unsigned int rd;
3189 unsigned int new_rd;
3190
3191 do {
3192 rd = atomic_read(&buffer->record_disabled);
3193 new_rd = rd | RB_BUFFER_OFF;
3194 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3195}
3196EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209void ring_buffer_record_on(struct ring_buffer *buffer)
3210{
3211 unsigned int rd;
3212 unsigned int new_rd;
3213
3214 do {
3215 rd = atomic_read(&buffer->record_disabled);
3216 new_rd = rd & ~RB_BUFFER_OFF;
3217 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3218}
3219EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3220
3221
3222
3223
3224
3225
3226
3227bool ring_buffer_record_is_on(struct ring_buffer *buffer)
3228{
3229 return !atomic_read(&buffer->record_disabled);
3230}
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243bool ring_buffer_record_is_set_on(struct ring_buffer *buffer)
3244{
3245 return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
3246}
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
3259{
3260 struct ring_buffer_per_cpu *cpu_buffer;
3261
3262 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3263 return;
3264
3265 cpu_buffer = buffer->buffers[cpu];
3266 atomic_inc(&cpu_buffer->record_disabled);
3267}
3268EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
3279{
3280 struct ring_buffer_per_cpu *cpu_buffer;
3281
3282 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3283 return;
3284
3285 cpu_buffer = buffer->buffers[cpu];
3286 atomic_dec(&cpu_buffer->record_disabled);
3287}
3288EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
3289
3290
3291
3292
3293
3294
3295
3296static inline unsigned long
3297rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3298{
3299 return local_read(&cpu_buffer->entries) -
3300 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3301}
3302
3303
3304
3305
3306
3307
3308u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
3309{
3310 unsigned long flags;
3311 struct ring_buffer_per_cpu *cpu_buffer;
3312 struct buffer_page *bpage;
3313 u64 ret = 0;
3314
3315 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3316 return 0;
3317
3318 cpu_buffer = buffer->buffers[cpu];
3319 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3320
3321
3322
3323
3324 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3325 bpage = cpu_buffer->reader_page;
3326 else
3327 bpage = rb_set_head_page(cpu_buffer);
3328 if (bpage)
3329 ret = bpage->page->time_stamp;
3330 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3331
3332 return ret;
3333}
3334EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3335
3336
3337
3338
3339
3340
3341unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
3342{
3343 struct ring_buffer_per_cpu *cpu_buffer;
3344 unsigned long ret;
3345
3346 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3347 return 0;
3348
3349 cpu_buffer = buffer->buffers[cpu];
3350 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3351
3352 return ret;
3353}
3354EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3355
3356
3357
3358
3359
3360
3361unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
3362{
3363 struct ring_buffer_per_cpu *cpu_buffer;
3364
3365 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3366 return 0;
3367
3368 cpu_buffer = buffer->buffers[cpu];
3369
3370 return rb_num_of_entries(cpu_buffer);
3371}
3372EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3373
3374
3375
3376
3377
3378
3379
3380unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3381{
3382 struct ring_buffer_per_cpu *cpu_buffer;
3383 unsigned long ret;
3384
3385 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3386 return 0;
3387
3388 cpu_buffer = buffer->buffers[cpu];
3389 ret = local_read(&cpu_buffer->overrun);
3390
3391 return ret;
3392}
3393EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3394
3395
3396
3397
3398
3399
3400
3401
3402unsigned long
3403ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3404{
3405 struct ring_buffer_per_cpu *cpu_buffer;
3406 unsigned long ret;
3407
3408 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3409 return 0;
3410
3411 cpu_buffer = buffer->buffers[cpu];
3412 ret = local_read(&cpu_buffer->commit_overrun);
3413
3414 return ret;
3415}
3416EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3417
3418
3419
3420
3421
3422
3423
3424unsigned long
3425ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3426{
3427 struct ring_buffer_per_cpu *cpu_buffer;
3428 unsigned long ret;
3429
3430 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3431 return 0;
3432
3433 cpu_buffer = buffer->buffers[cpu];
3434 ret = local_read(&cpu_buffer->dropped_events);
3435
3436 return ret;
3437}
3438EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3439
3440
3441
3442
3443
3444
3445unsigned long
3446ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3447{
3448 struct ring_buffer_per_cpu *cpu_buffer;
3449
3450 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3451 return 0;
3452
3453 cpu_buffer = buffer->buffers[cpu];
3454 return cpu_buffer->read;
3455}
3456EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3457
3458
3459
3460
3461
3462
3463
3464
3465unsigned long ring_buffer_entries(struct ring_buffer *buffer)
3466{
3467 struct ring_buffer_per_cpu *cpu_buffer;
3468 unsigned long entries = 0;
3469 int cpu;
3470
3471
3472 for_each_buffer_cpu(buffer, cpu) {
3473 cpu_buffer = buffer->buffers[cpu];
3474 entries += rb_num_of_entries(cpu_buffer);
3475 }
3476
3477 return entries;
3478}
3479EXPORT_SYMBOL_GPL(ring_buffer_entries);
3480
3481
3482
3483
3484
3485
3486
3487
3488unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
3489{
3490 struct ring_buffer_per_cpu *cpu_buffer;
3491 unsigned long overruns = 0;
3492 int cpu;
3493
3494
3495 for_each_buffer_cpu(buffer, cpu) {
3496 cpu_buffer = buffer->buffers[cpu];
3497 overruns += local_read(&cpu_buffer->overrun);
3498 }
3499
3500 return overruns;
3501}
3502EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3503
3504static void rb_iter_reset(struct ring_buffer_iter *iter)
3505{
3506 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3507
3508
3509 iter->head_page = cpu_buffer->reader_page;
3510 iter->head = cpu_buffer->reader_page->read;
3511
3512 iter->cache_reader_page = iter->head_page;
3513 iter->cache_read = cpu_buffer->read;
3514
3515 if (iter->head)
3516 iter->read_stamp = cpu_buffer->read_stamp;
3517 else
3518 iter->read_stamp = iter->head_page->page->time_stamp;
3519}
3520
3521
3522
3523
3524
3525
3526
3527
3528void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3529{
3530 struct ring_buffer_per_cpu *cpu_buffer;
3531 unsigned long flags;
3532
3533 if (!iter)
3534 return;
3535
3536 cpu_buffer = iter->cpu_buffer;
3537
3538 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3539 rb_iter_reset(iter);
3540 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3541}
3542EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3543
3544
3545
3546
3547
3548int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3549{
3550 struct ring_buffer_per_cpu *cpu_buffer;
3551 struct buffer_page *reader;
3552 struct buffer_page *head_page;
3553 struct buffer_page *commit_page;
3554 unsigned commit;
3555
3556 cpu_buffer = iter->cpu_buffer;
3557
3558
3559 reader = cpu_buffer->reader_page;
3560 head_page = cpu_buffer->head_page;
3561 commit_page = cpu_buffer->commit_page;
3562 commit = rb_page_commit(commit_page);
3563
3564 return ((iter->head_page == commit_page && iter->head == commit) ||
3565 (iter->head_page == reader && commit_page == head_page &&
3566 head_page->read == commit &&
3567 iter->head == rb_page_commit(cpu_buffer->reader_page)));
3568}
3569EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3570
3571static void
3572rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3573 struct ring_buffer_event *event)
3574{
3575 u64 delta;
3576
3577 switch (event->type_len) {
3578 case RINGBUF_TYPE_PADDING:
3579 return;
3580
3581 case RINGBUF_TYPE_TIME_EXTEND:
3582 delta = ring_buffer_event_time_stamp(event);
3583 cpu_buffer->read_stamp += delta;
3584 return;
3585
3586 case RINGBUF_TYPE_TIME_STAMP:
3587 delta = ring_buffer_event_time_stamp(event);
3588 cpu_buffer->read_stamp = delta;
3589 return;
3590
3591 case RINGBUF_TYPE_DATA:
3592 cpu_buffer->read_stamp += event->time_delta;
3593 return;
3594
3595 default:
3596 BUG();
3597 }
3598 return;
3599}
3600
3601static void
3602rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3603 struct ring_buffer_event *event)
3604{
3605 u64 delta;
3606
3607 switch (event->type_len) {
3608 case RINGBUF_TYPE_PADDING:
3609 return;
3610
3611 case RINGBUF_TYPE_TIME_EXTEND:
3612 delta = ring_buffer_event_time_stamp(event);
3613 iter->read_stamp += delta;
3614 return;
3615
3616 case RINGBUF_TYPE_TIME_STAMP:
3617 delta = ring_buffer_event_time_stamp(event);
3618 iter->read_stamp = delta;
3619 return;
3620
3621 case RINGBUF_TYPE_DATA:
3622 iter->read_stamp += event->time_delta;
3623 return;
3624
3625 default:
3626 BUG();
3627 }
3628 return;
3629}
3630
3631static struct buffer_page *
3632rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3633{
3634 struct buffer_page *reader = NULL;
3635 unsigned long overwrite;
3636 unsigned long flags;
3637 int nr_loops = 0;
3638 int ret;
3639
3640 local_irq_save(flags);
3641 arch_spin_lock(&cpu_buffer->lock);
3642
3643 again:
3644
3645
3646
3647
3648
3649
3650 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3651 reader = NULL;
3652 goto out;
3653 }
3654
3655 reader = cpu_buffer->reader_page;
3656
3657
3658 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3659 goto out;
3660
3661
3662 if (RB_WARN_ON(cpu_buffer,
3663 cpu_buffer->reader_page->read > rb_page_size(reader)))
3664 goto out;
3665
3666
3667 reader = NULL;
3668 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3669 goto out;
3670
3671
3672 if (rb_num_of_entries(cpu_buffer) == 0)
3673 goto out;
3674
3675
3676
3677
3678 local_set(&cpu_buffer->reader_page->write, 0);
3679 local_set(&cpu_buffer->reader_page->entries, 0);
3680 local_set(&cpu_buffer->reader_page->page->commit, 0);
3681 cpu_buffer->reader_page->real_end = 0;
3682
3683 spin:
3684
3685
3686
3687 reader = rb_set_head_page(cpu_buffer);
3688 if (!reader)
3689 goto out;
3690 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3691 cpu_buffer->reader_page->list.prev = reader->list.prev;
3692
3693
3694
3695
3696
3697
3698 cpu_buffer->pages = reader->list.prev;
3699
3700
3701 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712 smp_mb();
3713 overwrite = local_read(&(cpu_buffer->overrun));
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3727
3728
3729
3730
3731 if (!ret)
3732 goto spin;
3733
3734
3735
3736
3737
3738
3739 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3740 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3741
3742
3743 cpu_buffer->reader_page = reader;
3744 cpu_buffer->reader_page->read = 0;
3745
3746 if (overwrite != cpu_buffer->last_overrun) {
3747 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3748 cpu_buffer->last_overrun = overwrite;
3749 }
3750
3751 goto again;
3752
3753 out:
3754
3755 if (reader && reader->read == 0)
3756 cpu_buffer->read_stamp = reader->page->time_stamp;
3757
3758 arch_spin_unlock(&cpu_buffer->lock);
3759 local_irq_restore(flags);
3760
3761 return reader;
3762}
3763
3764static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3765{
3766 struct ring_buffer_event *event;
3767 struct buffer_page *reader;
3768 unsigned length;
3769
3770 reader = rb_get_reader_page(cpu_buffer);
3771
3772
3773 if (RB_WARN_ON(cpu_buffer, !reader))
3774 return;
3775
3776 event = rb_reader_event(cpu_buffer);
3777
3778 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3779 cpu_buffer->read++;
3780
3781 rb_update_read_stamp(cpu_buffer, event);
3782
3783 length = rb_event_length(event);
3784 cpu_buffer->reader_page->read += length;
3785}
3786
3787static void rb_advance_iter(struct ring_buffer_iter *iter)
3788{
3789 struct ring_buffer_per_cpu *cpu_buffer;
3790 struct ring_buffer_event *event;
3791 unsigned length;
3792
3793 cpu_buffer = iter->cpu_buffer;
3794
3795
3796
3797
3798 if (iter->head >= rb_page_size(iter->head_page)) {
3799
3800 if (iter->head_page == cpu_buffer->commit_page)
3801 return;
3802 rb_inc_iter(iter);
3803 return;
3804 }
3805
3806 event = rb_iter_head_event(iter);
3807
3808 length = rb_event_length(event);
3809
3810
3811
3812
3813
3814 if (RB_WARN_ON(cpu_buffer,
3815 (iter->head_page == cpu_buffer->commit_page) &&
3816 (iter->head + length > rb_commit_index(cpu_buffer))))
3817 return;
3818
3819 rb_update_iter_read_stamp(iter, event);
3820
3821 iter->head += length;
3822
3823
3824 if ((iter->head >= rb_page_size(iter->head_page)) &&
3825 (iter->head_page != cpu_buffer->commit_page))
3826 rb_inc_iter(iter);
3827}
3828
3829static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3830{
3831 return cpu_buffer->lost_events;
3832}
3833
3834static struct ring_buffer_event *
3835rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3836 unsigned long *lost_events)
3837{
3838 struct ring_buffer_event *event;
3839 struct buffer_page *reader;
3840 int nr_loops = 0;
3841
3842 if (ts)
3843 *ts = 0;
3844 again:
3845
3846
3847
3848
3849
3850
3851 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3852 return NULL;
3853
3854 reader = rb_get_reader_page(cpu_buffer);
3855 if (!reader)
3856 return NULL;
3857
3858 event = rb_reader_event(cpu_buffer);
3859
3860 switch (event->type_len) {
3861 case RINGBUF_TYPE_PADDING:
3862 if (rb_null_event(event))
3863 RB_WARN_ON(cpu_buffer, 1);
3864
3865
3866
3867
3868
3869
3870
3871
3872 return event;
3873
3874 case RINGBUF_TYPE_TIME_EXTEND:
3875
3876 rb_advance_reader(cpu_buffer);
3877 goto again;
3878
3879 case RINGBUF_TYPE_TIME_STAMP:
3880 if (ts) {
3881 *ts = ring_buffer_event_time_stamp(event);
3882 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3883 cpu_buffer->cpu, ts);
3884 }
3885
3886 rb_advance_reader(cpu_buffer);
3887 goto again;
3888
3889 case RINGBUF_TYPE_DATA:
3890 if (ts && !(*ts)) {
3891 *ts = cpu_buffer->read_stamp + event->time_delta;
3892 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3893 cpu_buffer->cpu, ts);
3894 }
3895 if (lost_events)
3896 *lost_events = rb_lost_events(cpu_buffer);
3897 return event;
3898
3899 default:
3900 BUG();
3901 }
3902
3903 return NULL;
3904}
3905EXPORT_SYMBOL_GPL(ring_buffer_peek);
3906
3907static struct ring_buffer_event *
3908rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3909{
3910 struct ring_buffer *buffer;
3911 struct ring_buffer_per_cpu *cpu_buffer;
3912 struct ring_buffer_event *event;
3913 int nr_loops = 0;
3914
3915 if (ts)
3916 *ts = 0;
3917
3918 cpu_buffer = iter->cpu_buffer;
3919 buffer = cpu_buffer->buffer;
3920
3921
3922
3923
3924
3925
3926 if (unlikely(iter->cache_read != cpu_buffer->read ||
3927 iter->cache_reader_page != cpu_buffer->reader_page))
3928 rb_iter_reset(iter);
3929
3930 again:
3931 if (ring_buffer_iter_empty(iter))
3932 return NULL;
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
3943 return NULL;
3944
3945 if (rb_per_cpu_empty(cpu_buffer))
3946 return NULL;
3947
3948 if (iter->head >= rb_page_size(iter->head_page)) {
3949 rb_inc_iter(iter);
3950 goto again;
3951 }
3952
3953 event = rb_iter_head_event(iter);
3954
3955 switch (event->type_len) {
3956 case RINGBUF_TYPE_PADDING:
3957 if (rb_null_event(event)) {
3958 rb_inc_iter(iter);
3959 goto again;
3960 }
3961 rb_advance_iter(iter);
3962 return event;
3963
3964 case RINGBUF_TYPE_TIME_EXTEND:
3965
3966 rb_advance_iter(iter);
3967 goto again;
3968
3969 case RINGBUF_TYPE_TIME_STAMP:
3970 if (ts) {
3971 *ts = ring_buffer_event_time_stamp(event);
3972 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3973 cpu_buffer->cpu, ts);
3974 }
3975
3976 rb_advance_iter(iter);
3977 goto again;
3978
3979 case RINGBUF_TYPE_DATA:
3980 if (ts && !(*ts)) {
3981 *ts = iter->read_stamp + event->time_delta;
3982 ring_buffer_normalize_time_stamp(buffer,
3983 cpu_buffer->cpu, ts);
3984 }
3985 return event;
3986
3987 default:
3988 BUG();
3989 }
3990
3991 return NULL;
3992}
3993EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
3994
3995static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
3996{
3997 if (likely(!in_nmi())) {
3998 raw_spin_lock(&cpu_buffer->reader_lock);
3999 return true;
4000 }
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011 if (raw_spin_trylock(&cpu_buffer->reader_lock))
4012 return true;
4013
4014
4015 atomic_inc(&cpu_buffer->record_disabled);
4016 return false;
4017}
4018
4019static inline void
4020rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
4021{
4022 if (likely(locked))
4023 raw_spin_unlock(&cpu_buffer->reader_lock);
4024 return;
4025}
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037struct ring_buffer_event *
4038ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
4039 unsigned long *lost_events)
4040{
4041 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4042 struct ring_buffer_event *event;
4043 unsigned long flags;
4044 bool dolock;
4045
4046 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4047 return NULL;
4048
4049 again:
4050 local_irq_save(flags);
4051 dolock = rb_reader_lock(cpu_buffer);
4052 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4053 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4054 rb_advance_reader(cpu_buffer);
4055 rb_reader_unlock(cpu_buffer, dolock);
4056 local_irq_restore(flags);
4057
4058 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4059 goto again;
4060
4061 return event;
4062}
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072struct ring_buffer_event *
4073ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4074{
4075 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4076 struct ring_buffer_event *event;
4077 unsigned long flags;
4078
4079 again:
4080 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4081 event = rb_iter_peek(iter, ts);
4082 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4083
4084 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4085 goto again;
4086
4087 return event;
4088}
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101struct ring_buffer_event *
4102ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
4103 unsigned long *lost_events)
4104{
4105 struct ring_buffer_per_cpu *cpu_buffer;
4106 struct ring_buffer_event *event = NULL;
4107 unsigned long flags;
4108 bool dolock;
4109
4110 again:
4111
4112 preempt_disable();
4113
4114 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4115 goto out;
4116
4117 cpu_buffer = buffer->buffers[cpu];
4118 local_irq_save(flags);
4119 dolock = rb_reader_lock(cpu_buffer);
4120
4121 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4122 if (event) {
4123 cpu_buffer->lost_events = 0;
4124 rb_advance_reader(cpu_buffer);
4125 }
4126
4127 rb_reader_unlock(cpu_buffer, dolock);
4128 local_irq_restore(flags);
4129
4130 out:
4131 preempt_enable();
4132
4133 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4134 goto again;
4135
4136 return event;
4137}
4138EXPORT_SYMBOL_GPL(ring_buffer_consume);
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160struct ring_buffer_iter *
4161ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
4162{
4163 struct ring_buffer_per_cpu *cpu_buffer;
4164 struct ring_buffer_iter *iter;
4165
4166 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4167 return NULL;
4168
4169 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
4170 if (!iter)
4171 return NULL;
4172
4173 cpu_buffer = buffer->buffers[cpu];
4174
4175 iter->cpu_buffer = cpu_buffer;
4176
4177 atomic_inc(&buffer->resize_disabled);
4178 atomic_inc(&cpu_buffer->record_disabled);
4179
4180 return iter;
4181}
4182EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
4183
4184
4185
4186
4187
4188
4189
4190
4191void
4192ring_buffer_read_prepare_sync(void)
4193{
4194 synchronize_sched();
4195}
4196EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209void
4210ring_buffer_read_start(struct ring_buffer_iter *iter)
4211{
4212 struct ring_buffer_per_cpu *cpu_buffer;
4213 unsigned long flags;
4214
4215 if (!iter)
4216 return;
4217
4218 cpu_buffer = iter->cpu_buffer;
4219
4220 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4221 arch_spin_lock(&cpu_buffer->lock);
4222 rb_iter_reset(iter);
4223 arch_spin_unlock(&cpu_buffer->lock);
4224 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4225}
4226EXPORT_SYMBOL_GPL(ring_buffer_read_start);
4227
4228
4229
4230
4231
4232
4233
4234
4235void
4236ring_buffer_read_finish(struct ring_buffer_iter *iter)
4237{
4238 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4239 unsigned long flags;
4240
4241
4242
4243
4244
4245
4246
4247 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4248 rb_check_pages(cpu_buffer);
4249 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4250
4251 atomic_dec(&cpu_buffer->record_disabled);
4252 atomic_dec(&cpu_buffer->buffer->resize_disabled);
4253 kfree(iter);
4254}
4255EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
4256
4257
4258
4259
4260
4261
4262
4263
4264struct ring_buffer_event *
4265ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
4266{
4267 struct ring_buffer_event *event;
4268 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4269 unsigned long flags;
4270
4271 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4272 again:
4273 event = rb_iter_peek(iter, ts);
4274 if (!event)
4275 goto out;
4276
4277 if (event->type_len == RINGBUF_TYPE_PADDING)
4278 goto again;
4279
4280 rb_advance_iter(iter);
4281 out:
4282 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4283
4284 return event;
4285}
4286EXPORT_SYMBOL_GPL(ring_buffer_read);
4287
4288
4289
4290
4291
4292unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
4293{
4294
4295
4296
4297
4298
4299
4300 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4301 return 0;
4302
4303 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
4304}
4305EXPORT_SYMBOL_GPL(ring_buffer_size);
4306
4307static void
4308rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
4309{
4310 rb_head_page_deactivate(cpu_buffer);
4311
4312 cpu_buffer->head_page
4313 = list_entry(cpu_buffer->pages, struct buffer_page, list);
4314 local_set(&cpu_buffer->head_page->write, 0);
4315 local_set(&cpu_buffer->head_page->entries, 0);
4316 local_set(&cpu_buffer->head_page->page->commit, 0);
4317
4318 cpu_buffer->head_page->read = 0;
4319
4320 cpu_buffer->tail_page = cpu_buffer->head_page;
4321 cpu_buffer->commit_page = cpu_buffer->head_page;
4322
4323 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
4324 INIT_LIST_HEAD(&cpu_buffer->new_pages);
4325 local_set(&cpu_buffer->reader_page->write, 0);
4326 local_set(&cpu_buffer->reader_page->entries, 0);
4327 local_set(&cpu_buffer->reader_page->page->commit, 0);
4328 cpu_buffer->reader_page->read = 0;
4329
4330 local_set(&cpu_buffer->entries_bytes, 0);
4331 local_set(&cpu_buffer->overrun, 0);
4332 local_set(&cpu_buffer->commit_overrun, 0);
4333 local_set(&cpu_buffer->dropped_events, 0);
4334 local_set(&cpu_buffer->entries, 0);
4335 local_set(&cpu_buffer->committing, 0);
4336 local_set(&cpu_buffer->commits, 0);
4337 cpu_buffer->read = 0;
4338 cpu_buffer->read_bytes = 0;
4339
4340 cpu_buffer->write_stamp = 0;
4341 cpu_buffer->read_stamp = 0;
4342
4343 cpu_buffer->lost_events = 0;
4344 cpu_buffer->last_overrun = 0;
4345
4346 rb_head_page_activate(cpu_buffer);
4347}
4348
4349
4350
4351
4352
4353
4354void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
4355{
4356 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4357 unsigned long flags;
4358
4359 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4360 return;
4361
4362 atomic_inc(&buffer->resize_disabled);
4363 atomic_inc(&cpu_buffer->record_disabled);
4364
4365
4366 synchronize_sched();
4367
4368 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4369
4370 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
4371 goto out;
4372
4373 arch_spin_lock(&cpu_buffer->lock);
4374
4375 rb_reset_cpu(cpu_buffer);
4376
4377 arch_spin_unlock(&cpu_buffer->lock);
4378
4379 out:
4380 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4381
4382 atomic_dec(&cpu_buffer->record_disabled);
4383 atomic_dec(&buffer->resize_disabled);
4384}
4385EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
4386
4387
4388
4389
4390
4391void ring_buffer_reset(struct ring_buffer *buffer)
4392{
4393 int cpu;
4394
4395 for_each_buffer_cpu(buffer, cpu)
4396 ring_buffer_reset_cpu(buffer, cpu);
4397}
4398EXPORT_SYMBOL_GPL(ring_buffer_reset);
4399
4400
4401
4402
4403
4404bool ring_buffer_empty(struct ring_buffer *buffer)
4405{
4406 struct ring_buffer_per_cpu *cpu_buffer;
4407 unsigned long flags;
4408 bool dolock;
4409 int cpu;
4410 int ret;
4411
4412
4413 for_each_buffer_cpu(buffer, cpu) {
4414 cpu_buffer = buffer->buffers[cpu];
4415 local_irq_save(flags);
4416 dolock = rb_reader_lock(cpu_buffer);
4417 ret = rb_per_cpu_empty(cpu_buffer);
4418 rb_reader_unlock(cpu_buffer, dolock);
4419 local_irq_restore(flags);
4420
4421 if (!ret)
4422 return false;
4423 }
4424
4425 return true;
4426}
4427EXPORT_SYMBOL_GPL(ring_buffer_empty);
4428
4429
4430
4431
4432
4433
4434bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
4435{
4436 struct ring_buffer_per_cpu *cpu_buffer;
4437 unsigned long flags;
4438 bool dolock;
4439 int ret;
4440
4441 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4442 return true;
4443
4444 cpu_buffer = buffer->buffers[cpu];
4445 local_irq_save(flags);
4446 dolock = rb_reader_lock(cpu_buffer);
4447 ret = rb_per_cpu_empty(cpu_buffer);
4448 rb_reader_unlock(cpu_buffer, dolock);
4449 local_irq_restore(flags);
4450
4451 return ret;
4452}
4453EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
4454
4455#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4467 struct ring_buffer *buffer_b, int cpu)
4468{
4469 struct ring_buffer_per_cpu *cpu_buffer_a;
4470 struct ring_buffer_per_cpu *cpu_buffer_b;
4471 int ret = -EINVAL;
4472
4473 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4474 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4475 goto out;
4476
4477 cpu_buffer_a = buffer_a->buffers[cpu];
4478 cpu_buffer_b = buffer_b->buffers[cpu];
4479
4480
4481 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4482 goto out;
4483
4484 ret = -EAGAIN;
4485
4486 if (atomic_read(&buffer_a->record_disabled))
4487 goto out;
4488
4489 if (atomic_read(&buffer_b->record_disabled))
4490 goto out;
4491
4492 if (atomic_read(&cpu_buffer_a->record_disabled))
4493 goto out;
4494
4495 if (atomic_read(&cpu_buffer_b->record_disabled))
4496 goto out;
4497
4498
4499
4500
4501
4502
4503
4504 atomic_inc(&cpu_buffer_a->record_disabled);
4505 atomic_inc(&cpu_buffer_b->record_disabled);
4506
4507 ret = -EBUSY;
4508 if (local_read(&cpu_buffer_a->committing))
4509 goto out_dec;
4510 if (local_read(&cpu_buffer_b->committing))
4511 goto out_dec;
4512
4513 buffer_a->buffers[cpu] = cpu_buffer_b;
4514 buffer_b->buffers[cpu] = cpu_buffer_a;
4515
4516 cpu_buffer_b->buffer = buffer_a;
4517 cpu_buffer_a->buffer = buffer_b;
4518
4519 ret = 0;
4520
4521out_dec:
4522 atomic_dec(&cpu_buffer_a->record_disabled);
4523 atomic_dec(&cpu_buffer_b->record_disabled);
4524out:
4525 return ret;
4526}
4527EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4528#endif
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4547{
4548 struct ring_buffer_per_cpu *cpu_buffer;
4549 struct buffer_data_page *bpage = NULL;
4550 unsigned long flags;
4551 struct page *page;
4552
4553 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4554 return ERR_PTR(-ENODEV);
4555
4556 cpu_buffer = buffer->buffers[cpu];
4557 local_irq_save(flags);
4558 arch_spin_lock(&cpu_buffer->lock);
4559
4560 if (cpu_buffer->free_page) {
4561 bpage = cpu_buffer->free_page;
4562 cpu_buffer->free_page = NULL;
4563 }
4564
4565 arch_spin_unlock(&cpu_buffer->lock);
4566 local_irq_restore(flags);
4567
4568 if (bpage)
4569 goto out;
4570
4571 page = alloc_pages_node(cpu_to_node(cpu),
4572 GFP_KERNEL | __GFP_NORETRY, 0);
4573 if (!page)
4574 return ERR_PTR(-ENOMEM);
4575
4576 bpage = page_address(page);
4577
4578 out:
4579 rb_init_page(bpage);
4580
4581 return bpage;
4582}
4583EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4594{
4595 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4596 struct buffer_data_page *bpage = data;
4597 struct page *page = virt_to_page(bpage);
4598 unsigned long flags;
4599
4600
4601 if (page_ref_count(page) > 1)
4602 goto out;
4603
4604 local_irq_save(flags);
4605 arch_spin_lock(&cpu_buffer->lock);
4606
4607 if (!cpu_buffer->free_page) {
4608 cpu_buffer->free_page = bpage;
4609 bpage = NULL;
4610 }
4611
4612 arch_spin_unlock(&cpu_buffer->lock);
4613 local_irq_restore(flags);
4614
4615 out:
4616 free_page((unsigned long)bpage);
4617}
4618EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653int ring_buffer_read_page(struct ring_buffer *buffer,
4654 void **data_page, size_t len, int cpu, int full)
4655{
4656 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4657 struct ring_buffer_event *event;
4658 struct buffer_data_page *bpage;
4659 struct buffer_page *reader;
4660 unsigned long missed_events;
4661 unsigned long flags;
4662 unsigned int commit;
4663 unsigned int read;
4664 u64 save_timestamp;
4665 int ret = -1;
4666
4667 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4668 goto out;
4669
4670
4671
4672
4673
4674 if (len <= BUF_PAGE_HDR_SIZE)
4675 goto out;
4676
4677 len -= BUF_PAGE_HDR_SIZE;
4678
4679 if (!data_page)
4680 goto out;
4681
4682 bpage = *data_page;
4683 if (!bpage)
4684 goto out;
4685
4686 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4687
4688 reader = rb_get_reader_page(cpu_buffer);
4689 if (!reader)
4690 goto out_unlock;
4691
4692 event = rb_reader_event(cpu_buffer);
4693
4694 read = reader->read;
4695 commit = rb_page_commit(reader);
4696
4697
4698 missed_events = cpu_buffer->lost_events;
4699
4700
4701
4702
4703
4704
4705
4706
4707 if (read || (len < (commit - read)) ||
4708 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4709 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4710 unsigned int rpos = read;
4711 unsigned int pos = 0;
4712 unsigned int size;
4713
4714 if (full)
4715 goto out_unlock;
4716
4717 if (len > (commit - read))
4718 len = (commit - read);
4719
4720
4721 size = rb_event_ts_length(event);
4722
4723 if (len < size)
4724 goto out_unlock;
4725
4726
4727 save_timestamp = cpu_buffer->read_stamp;
4728
4729
4730 do {
4731
4732
4733
4734
4735
4736
4737 size = rb_event_length(event);
4738 memcpy(bpage->data + pos, rpage->data + rpos, size);
4739
4740 len -= size;
4741
4742 rb_advance_reader(cpu_buffer);
4743 rpos = reader->read;
4744 pos += size;
4745
4746 if (rpos >= commit)
4747 break;
4748
4749 event = rb_reader_event(cpu_buffer);
4750
4751 size = rb_event_ts_length(event);
4752 } while (len >= size);
4753
4754
4755 local_set(&bpage->commit, pos);
4756 bpage->time_stamp = save_timestamp;
4757
4758
4759 read = 0;
4760 } else {
4761
4762 cpu_buffer->read += rb_page_entries(reader);
4763 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4764
4765
4766 rb_init_page(bpage);
4767 bpage = reader->page;
4768 reader->page = *data_page;
4769 local_set(&reader->write, 0);
4770 local_set(&reader->entries, 0);
4771 reader->read = 0;
4772 *data_page = bpage;
4773
4774
4775
4776
4777
4778
4779 if (reader->real_end)
4780 local_set(&bpage->commit, reader->real_end);
4781 }
4782 ret = read;
4783
4784 cpu_buffer->lost_events = 0;
4785
4786 commit = local_read(&bpage->commit);
4787
4788
4789
4790 if (missed_events) {
4791
4792
4793
4794 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4795 memcpy(&bpage->data[commit], &missed_events,
4796 sizeof(missed_events));
4797 local_add(RB_MISSED_STORED, &bpage->commit);
4798 commit += sizeof(missed_events);
4799 }
4800 local_add(RB_MISSED_EVENTS, &bpage->commit);
4801 }
4802
4803
4804
4805
4806 if (commit < BUF_PAGE_SIZE)
4807 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4808
4809 out_unlock:
4810 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4811
4812 out:
4813 return ret;
4814}
4815EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4816
4817
4818
4819
4820
4821
4822int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
4823{
4824 struct ring_buffer *buffer;
4825 long nr_pages_same;
4826 int cpu_i;
4827 unsigned long nr_pages;
4828
4829 buffer = container_of(node, struct ring_buffer, node);
4830 if (cpumask_test_cpu(cpu, buffer->cpumask))
4831 return 0;
4832
4833 nr_pages = 0;
4834 nr_pages_same = 1;
4835
4836 for_each_buffer_cpu(buffer, cpu_i) {
4837
4838 if (nr_pages == 0)
4839 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4840 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4841 nr_pages_same = 0;
4842 break;
4843 }
4844 }
4845
4846 if (!nr_pages_same)
4847 nr_pages = 2;
4848 buffer->buffers[cpu] =
4849 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4850 if (!buffer->buffers[cpu]) {
4851 WARN(1, "failed to allocate ring buffer on CPU %u\n",
4852 cpu);
4853 return -ENOMEM;
4854 }
4855 smp_wmb();
4856 cpumask_set_cpu(cpu, buffer->cpumask);
4857 return 0;
4858}
4859
4860#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876static struct task_struct *rb_threads[NR_CPUS] __initdata;
4877
4878struct rb_test_data {
4879 struct ring_buffer *buffer;
4880 unsigned long events;
4881 unsigned long bytes_written;
4882 unsigned long bytes_alloc;
4883 unsigned long bytes_dropped;
4884 unsigned long events_nested;
4885 unsigned long bytes_written_nested;
4886 unsigned long bytes_alloc_nested;
4887 unsigned long bytes_dropped_nested;
4888 int min_size_nested;
4889 int max_size_nested;
4890 int max_size;
4891 int min_size;
4892 int cpu;
4893 int cnt;
4894};
4895
4896static struct rb_test_data rb_data[NR_CPUS] __initdata;
4897
4898
4899#define RB_TEST_BUFFER_SIZE 1048576
4900
4901static char rb_string[] __initdata =
4902 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
4903 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
4904 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
4905
4906static bool rb_test_started __initdata;
4907
4908struct rb_item {
4909 int size;
4910 char str[];
4911};
4912
4913static __init int rb_write_something(struct rb_test_data *data, bool nested)
4914{
4915 struct ring_buffer_event *event;
4916 struct rb_item *item;
4917 bool started;
4918 int event_len;
4919 int size;
4920 int len;
4921 int cnt;
4922
4923
4924 cnt = data->cnt + (nested ? 27 : 0);
4925
4926
4927 size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
4928
4929 len = size + sizeof(struct rb_item);
4930
4931 started = rb_test_started;
4932
4933 smp_rmb();
4934
4935 event = ring_buffer_lock_reserve(data->buffer, len);
4936 if (!event) {
4937
4938 if (started) {
4939 if (nested)
4940 data->bytes_dropped += len;
4941 else
4942 data->bytes_dropped_nested += len;
4943 }
4944 return len;
4945 }
4946
4947 event_len = ring_buffer_event_length(event);
4948
4949 if (RB_WARN_ON(data->buffer, event_len < len))
4950 goto out;
4951
4952 item = ring_buffer_event_data(event);
4953 item->size = size;
4954 memcpy(item->str, rb_string, size);
4955
4956 if (nested) {
4957 data->bytes_alloc_nested += event_len;
4958 data->bytes_written_nested += len;
4959 data->events_nested++;
4960 if (!data->min_size_nested || len < data->min_size_nested)
4961 data->min_size_nested = len;
4962 if (len > data->max_size_nested)
4963 data->max_size_nested = len;
4964 } else {
4965 data->bytes_alloc += event_len;
4966 data->bytes_written += len;
4967 data->events++;
4968 if (!data->min_size || len < data->min_size)
4969 data->max_size = len;
4970 if (len > data->max_size)
4971 data->max_size = len;
4972 }
4973
4974 out:
4975 ring_buffer_unlock_commit(data->buffer, event);
4976
4977 return 0;
4978}
4979
4980static __init int rb_test(void *arg)
4981{
4982 struct rb_test_data *data = arg;
4983
4984 while (!kthread_should_stop()) {
4985 rb_write_something(data, false);
4986 data->cnt++;
4987
4988 set_current_state(TASK_INTERRUPTIBLE);
4989
4990 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
4991 }
4992
4993 return 0;
4994}
4995
4996static __init void rb_ipi(void *ignore)
4997{
4998 struct rb_test_data *data;
4999 int cpu = smp_processor_id();
5000
5001 data = &rb_data[cpu];
5002 rb_write_something(data, true);
5003}
5004
5005static __init int rb_hammer_test(void *arg)
5006{
5007 while (!kthread_should_stop()) {
5008
5009
5010 smp_call_function(rb_ipi, NULL, 1);
5011
5012 schedule();
5013 }
5014
5015 return 0;
5016}
5017
5018static __init int test_ringbuffer(void)
5019{
5020 struct task_struct *rb_hammer;
5021 struct ring_buffer *buffer;
5022 int cpu;
5023 int ret = 0;
5024
5025 pr_info("Running ring buffer tests...\n");
5026
5027 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
5028 if (WARN_ON(!buffer))
5029 return 0;
5030
5031
5032 ring_buffer_record_off(buffer);
5033
5034 for_each_online_cpu(cpu) {
5035 rb_data[cpu].buffer = buffer;
5036 rb_data[cpu].cpu = cpu;
5037 rb_data[cpu].cnt = cpu;
5038 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
5039 "rbtester/%d", cpu);
5040 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
5041 pr_cont("FAILED\n");
5042 ret = PTR_ERR(rb_threads[cpu]);
5043 goto out_free;
5044 }
5045
5046 kthread_bind(rb_threads[cpu], cpu);
5047 wake_up_process(rb_threads[cpu]);
5048 }
5049
5050
5051 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
5052 if (WARN_ON(IS_ERR(rb_hammer))) {
5053 pr_cont("FAILED\n");
5054 ret = PTR_ERR(rb_hammer);
5055 goto out_free;
5056 }
5057
5058 ring_buffer_record_on(buffer);
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068 smp_wmb();
5069 rb_test_started = true;
5070
5071 set_current_state(TASK_INTERRUPTIBLE);
5072 ;
5073 schedule_timeout(10 * HZ);
5074
5075 kthread_stop(rb_hammer);
5076
5077 out_free:
5078 for_each_online_cpu(cpu) {
5079 if (!rb_threads[cpu])
5080 break;
5081 kthread_stop(rb_threads[cpu]);
5082 }
5083 if (ret) {
5084 ring_buffer_free(buffer);
5085 return ret;
5086 }
5087
5088
5089 pr_info("finished\n");
5090 for_each_online_cpu(cpu) {
5091 struct ring_buffer_event *event;
5092 struct rb_test_data *data = &rb_data[cpu];
5093 struct rb_item *item;
5094 unsigned long total_events;
5095 unsigned long total_dropped;
5096 unsigned long total_written;
5097 unsigned long total_alloc;
5098 unsigned long total_read = 0;
5099 unsigned long total_size = 0;
5100 unsigned long total_len = 0;
5101 unsigned long total_lost = 0;
5102 unsigned long lost;
5103 int big_event_size;
5104 int small_event_size;
5105
5106 ret = -1;
5107
5108 total_events = data->events + data->events_nested;
5109 total_written = data->bytes_written + data->bytes_written_nested;
5110 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
5111 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
5112
5113 big_event_size = data->max_size + data->max_size_nested;
5114 small_event_size = data->min_size + data->min_size_nested;
5115
5116 pr_info("CPU %d:\n", cpu);
5117 pr_info(" events: %ld\n", total_events);
5118 pr_info(" dropped bytes: %ld\n", total_dropped);
5119 pr_info(" alloced bytes: %ld\n", total_alloc);
5120 pr_info(" written bytes: %ld\n", total_written);
5121 pr_info(" biggest event: %d\n", big_event_size);
5122 pr_info(" smallest event: %d\n", small_event_size);
5123
5124 if (RB_WARN_ON(buffer, total_dropped))
5125 break;
5126
5127 ret = 0;
5128
5129 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
5130 total_lost += lost;
5131 item = ring_buffer_event_data(event);
5132 total_len += ring_buffer_event_length(event);
5133 total_size += item->size + sizeof(struct rb_item);
5134 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
5135 pr_info("FAILED!\n");
5136 pr_info("buffer had: %.*s\n", item->size, item->str);
5137 pr_info("expected: %.*s\n", item->size, rb_string);
5138 RB_WARN_ON(buffer, 1);
5139 ret = -1;
5140 break;
5141 }
5142 total_read++;
5143 }
5144 if (ret)
5145 break;
5146
5147 ret = -1;
5148
5149 pr_info(" read events: %ld\n", total_read);
5150 pr_info(" lost events: %ld\n", total_lost);
5151 pr_info(" total events: %ld\n", total_lost + total_read);
5152 pr_info(" recorded len bytes: %ld\n", total_len);
5153 pr_info(" recorded size bytes: %ld\n", total_size);
5154 if (total_lost)
5155 pr_info(" With dropped events, record len and size may not match\n"
5156 " alloced and written from above\n");
5157 if (!total_lost) {
5158 if (RB_WARN_ON(buffer, total_len != total_alloc ||
5159 total_size != total_written))
5160 break;
5161 }
5162 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
5163 break;
5164
5165 ret = 0;
5166 }
5167 if (!ret)
5168 pr_info("Ring buffer PASSED!\n");
5169
5170 ring_buffer_free(buffer);
5171 return 0;
5172}
5173
5174late_initcall(test_ringbuffer);
5175#endif
5176