1
2
3
4
5
6
7#include <linux/trace_events.h>
8#include <linux/ring_buffer.h>
9#include <linux/trace_clock.h>
10#include <linux/sched/clock.h>
11#include <linux/trace_seq.h>
12#include <linux/spinlock.h>
13#include <linux/irq_work.h>
14#include <linux/uaccess.h>
15#include <linux/hardirq.h>
16#include <linux/kthread.h>
17#include <linux/module.h>
18#include <linux/percpu.h>
19#include <linux/mutex.h>
20#include <linux/delay.h>
21#include <linux/slab.h>
22#include <linux/init.h>
23#include <linux/hash.h>
24#include <linux/list.h>
25#include <linux/cpu.h>
26#include <linux/oom.h>
27
28#include <asm/local.h>
29
30static void update_pages_handler(struct work_struct *work);
31
32
33
34
35int ring_buffer_print_entry_header(struct trace_seq *s)
36{
37 trace_seq_puts(s, "# compressed entry header\n");
38 trace_seq_puts(s, "\ttype_len : 5 bits\n");
39 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
40 trace_seq_puts(s, "\tarray : 32 bits\n");
41 trace_seq_putc(s, '\n');
42 trace_seq_printf(s, "\tpadding : type == %d\n",
43 RINGBUF_TYPE_PADDING);
44 trace_seq_printf(s, "\ttime_extend : type == %d\n",
45 RINGBUF_TYPE_TIME_EXTEND);
46 trace_seq_printf(s, "\ttime_stamp : type == %d\n",
47 RINGBUF_TYPE_TIME_STAMP);
48 trace_seq_printf(s, "\tdata max type_len == %d\n",
49 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
50
51 return !trace_seq_has_overflowed(s);
52}
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123#define RB_BUFFER_OFF (1 << 20)
124
125#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
126
127#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
128#define RB_ALIGNMENT 4U
129#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
130#define RB_EVNT_MIN_SIZE 8U
131
132#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
133# define RB_FORCE_8BYTE_ALIGNMENT 0
134# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
135#else
136# define RB_FORCE_8BYTE_ALIGNMENT 1
137# define RB_ARCH_ALIGNMENT 8U
138#endif
139
140#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
141
142
143#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
144
145enum {
146 RB_LEN_TIME_EXTEND = 8,
147 RB_LEN_TIME_STAMP = 8,
148};
149
150#define skip_time_extend(event) \
151 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
152
153#define extended_time(event) \
154 (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
155
156static inline int rb_null_event(struct ring_buffer_event *event)
157{
158 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
159}
160
161static void rb_event_set_padding(struct ring_buffer_event *event)
162{
163
164 event->type_len = RINGBUF_TYPE_PADDING;
165 event->time_delta = 0;
166}
167
168static unsigned
169rb_event_data_length(struct ring_buffer_event *event)
170{
171 unsigned length;
172
173 if (event->type_len)
174 length = event->type_len * RB_ALIGNMENT;
175 else
176 length = event->array[0];
177 return length + RB_EVNT_HDR_SIZE;
178}
179
180
181
182
183
184
185static inline unsigned
186rb_event_length(struct ring_buffer_event *event)
187{
188 switch (event->type_len) {
189 case RINGBUF_TYPE_PADDING:
190 if (rb_null_event(event))
191
192 return -1;
193 return event->array[0] + RB_EVNT_HDR_SIZE;
194
195 case RINGBUF_TYPE_TIME_EXTEND:
196 return RB_LEN_TIME_EXTEND;
197
198 case RINGBUF_TYPE_TIME_STAMP:
199 return RB_LEN_TIME_STAMP;
200
201 case RINGBUF_TYPE_DATA:
202 return rb_event_data_length(event);
203 default:
204 BUG();
205 }
206
207 return 0;
208}
209
210
211
212
213
214static inline unsigned
215rb_event_ts_length(struct ring_buffer_event *event)
216{
217 unsigned len = 0;
218
219 if (extended_time(event)) {
220
221 len = RB_LEN_TIME_EXTEND;
222 event = skip_time_extend(event);
223 }
224 return len + rb_event_length(event);
225}
226
227
228
229
230
231
232
233
234
235
236
237unsigned ring_buffer_event_length(struct ring_buffer_event *event)
238{
239 unsigned length;
240
241 if (extended_time(event))
242 event = skip_time_extend(event);
243
244 length = rb_event_length(event);
245 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
246 return length;
247 length -= RB_EVNT_HDR_SIZE;
248 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
249 length -= sizeof(event->array[0]);
250 return length;
251}
252EXPORT_SYMBOL_GPL(ring_buffer_event_length);
253
254
255static __always_inline void *
256rb_event_data(struct ring_buffer_event *event)
257{
258 if (extended_time(event))
259 event = skip_time_extend(event);
260 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
261
262 if (event->type_len)
263 return (void *)&event->array[0];
264
265 return (void *)&event->array[1];
266}
267
268
269
270
271
272void *ring_buffer_event_data(struct ring_buffer_event *event)
273{
274 return rb_event_data(event);
275}
276EXPORT_SYMBOL_GPL(ring_buffer_event_data);
277
278#define for_each_buffer_cpu(buffer, cpu) \
279 for_each_cpu(cpu, buffer->cpumask)
280
281#define TS_SHIFT 27
282#define TS_MASK ((1ULL << TS_SHIFT) - 1)
283#define TS_DELTA_TEST (~TS_MASK)
284
285
286
287
288
289
290
291
292
293
294
295u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
296{
297 u64 ts;
298
299 ts = event->array[0];
300 ts <<= TS_SHIFT;
301 ts += event->time_delta;
302
303 return ts;
304}
305
306
307#define RB_MISSED_EVENTS (1 << 31)
308
309#define RB_MISSED_STORED (1 << 30)
310
311#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
312
313struct buffer_data_page {
314 u64 time_stamp;
315 local_t commit;
316 unsigned char data[] RB_ALIGN_DATA;
317};
318
319
320
321
322
323
324
325
326
327struct buffer_page {
328 struct list_head list;
329 local_t write;
330 unsigned read;
331 local_t entries;
332 unsigned long real_end;
333 struct buffer_data_page *page;
334};
335
336
337
338
339
340
341
342
343
344
345
346
347
348#define RB_WRITE_MASK 0xfffff
349#define RB_WRITE_INTCNT (1 << 20)
350
351static void rb_init_page(struct buffer_data_page *bpage)
352{
353 local_set(&bpage->commit, 0);
354}
355
356
357
358
359
360
361
362size_t ring_buffer_page_len(void *page)
363{
364 struct buffer_data_page *bpage = page;
365
366 return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
367 + BUF_PAGE_HDR_SIZE;
368}
369
370
371
372
373
374static void free_buffer_page(struct buffer_page *bpage)
375{
376 free_page((unsigned long)bpage->page);
377 kfree(bpage);
378}
379
380
381
382
383static inline int test_time_stamp(u64 delta)
384{
385 if (delta & TS_DELTA_TEST)
386 return 1;
387 return 0;
388}
389
390#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
391
392
393#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
394
395int ring_buffer_print_page_header(struct trace_seq *s)
396{
397 struct buffer_data_page field;
398
399 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
400 "offset:0;\tsize:%u;\tsigned:%u;\n",
401 (unsigned int)sizeof(field.time_stamp),
402 (unsigned int)is_signed_type(u64));
403
404 trace_seq_printf(s, "\tfield: local_t commit;\t"
405 "offset:%u;\tsize:%u;\tsigned:%u;\n",
406 (unsigned int)offsetof(typeof(field), commit),
407 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long));
409
410 trace_seq_printf(s, "\tfield: int overwrite;\t"
411 "offset:%u;\tsize:%u;\tsigned:%u;\n",
412 (unsigned int)offsetof(typeof(field), commit),
413 1,
414 (unsigned int)is_signed_type(long));
415
416 trace_seq_printf(s, "\tfield: char data;\t"
417 "offset:%u;\tsize:%u;\tsigned:%u;\n",
418 (unsigned int)offsetof(typeof(field), data),
419 (unsigned int)BUF_PAGE_SIZE,
420 (unsigned int)is_signed_type(char));
421
422 return !trace_seq_has_overflowed(s);
423}
424
425struct rb_irq_work {
426 struct irq_work work;
427 wait_queue_head_t waiters;
428 wait_queue_head_t full_waiters;
429 bool waiters_pending;
430 bool full_waiters_pending;
431 bool wakeup_full;
432};
433
434
435
436
437struct rb_event_info {
438 u64 ts;
439 u64 delta;
440 unsigned long length;
441 struct buffer_page *tail_page;
442 int add_timestamp;
443};
444
445
446
447
448
449
450
451
452
453
454enum {
455 RB_CTX_NMI,
456 RB_CTX_IRQ,
457 RB_CTX_SOFTIRQ,
458 RB_CTX_NORMAL,
459 RB_CTX_MAX
460};
461
462
463
464
465struct ring_buffer_per_cpu {
466 int cpu;
467 atomic_t record_disabled;
468 struct ring_buffer *buffer;
469 raw_spinlock_t reader_lock;
470 arch_spinlock_t lock;
471 struct lock_class_key lock_key;
472 struct buffer_data_page *free_page;
473 unsigned long nr_pages;
474 unsigned int current_context;
475 struct list_head *pages;
476 struct buffer_page *head_page;
477 struct buffer_page *tail_page;
478 struct buffer_page *commit_page;
479 struct buffer_page *reader_page;
480 unsigned long lost_events;
481 unsigned long last_overrun;
482 unsigned long nest;
483 local_t entries_bytes;
484 local_t entries;
485 local_t overrun;
486 local_t commit_overrun;
487 local_t dropped_events;
488 local_t committing;
489 local_t commits;
490 local_t pages_touched;
491 local_t pages_read;
492 long last_pages_touch;
493 size_t shortest_full;
494 unsigned long read;
495 unsigned long read_bytes;
496 u64 write_stamp;
497 u64 read_stamp;
498
499 long nr_pages_to_update;
500 struct list_head new_pages;
501 struct work_struct update_pages_work;
502 struct completion update_done;
503
504 struct rb_irq_work irq_work;
505};
506
507struct ring_buffer {
508 unsigned flags;
509 int cpus;
510 atomic_t record_disabled;
511 atomic_t resize_disabled;
512 cpumask_var_t cpumask;
513
514 struct lock_class_key *reader_lock_key;
515
516 struct mutex mutex;
517
518 struct ring_buffer_per_cpu **buffers;
519
520 struct hlist_node node;
521 u64 (*clock)(void);
522
523 struct rb_irq_work irq_work;
524 bool time_stamp_abs;
525};
526
527struct ring_buffer_iter {
528 struct ring_buffer_per_cpu *cpu_buffer;
529 unsigned long head;
530 struct buffer_page *head_page;
531 struct buffer_page *cache_reader_page;
532 unsigned long cache_read;
533 u64 read_stamp;
534};
535
536
537
538
539
540
541
542
543size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu)
544{
545 return buffer->buffers[cpu]->nr_pages;
546}
547
548
549
550
551
552
553
554
555size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu)
556{
557 size_t read;
558 size_t cnt;
559
560 read = local_read(&buffer->buffers[cpu]->pages_read);
561 cnt = local_read(&buffer->buffers[cpu]->pages_touched);
562
563 if (cnt < read) {
564 WARN_ON_ONCE(read > cnt + 1);
565 return 0;
566 }
567
568 return cnt - read;
569}
570
571
572
573
574
575
576
577static void rb_wake_up_waiters(struct irq_work *work)
578{
579 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
580
581 wake_up_all(&rbwork->waiters);
582 if (rbwork->wakeup_full) {
583 rbwork->wakeup_full = false;
584 wake_up_all(&rbwork->full_waiters);
585 }
586}
587
588
589
590
591
592
593
594
595
596
597
598int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
599{
600 struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
601 DEFINE_WAIT(wait);
602 struct rb_irq_work *work;
603 int ret = 0;
604
605
606
607
608
609
610 if (cpu == RING_BUFFER_ALL_CPUS) {
611 work = &buffer->irq_work;
612
613 full = 0;
614 } else {
615 if (!cpumask_test_cpu(cpu, buffer->cpumask))
616 return -ENODEV;
617 cpu_buffer = buffer->buffers[cpu];
618 work = &cpu_buffer->irq_work;
619 }
620
621
622 while (true) {
623 if (full)
624 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
625 else
626 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648 if (full)
649 work->full_waiters_pending = true;
650 else
651 work->waiters_pending = true;
652
653 if (signal_pending(current)) {
654 ret = -EINTR;
655 break;
656 }
657
658 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
659 break;
660
661 if (cpu != RING_BUFFER_ALL_CPUS &&
662 !ring_buffer_empty_cpu(buffer, cpu)) {
663 unsigned long flags;
664 bool pagebusy;
665 size_t nr_pages;
666 size_t dirty;
667
668 if (!full)
669 break;
670
671 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
672 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
673 nr_pages = cpu_buffer->nr_pages;
674 dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
675 if (!cpu_buffer->shortest_full ||
676 cpu_buffer->shortest_full < full)
677 cpu_buffer->shortest_full = full;
678 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
679 if (!pagebusy &&
680 (!nr_pages || (dirty * 100) > full * nr_pages))
681 break;
682 }
683
684 schedule();
685 }
686
687 if (full)
688 finish_wait(&work->full_waiters, &wait);
689 else
690 finish_wait(&work->waiters, &wait);
691
692 return ret;
693}
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
710 struct file *filp, poll_table *poll_table)
711{
712 struct ring_buffer_per_cpu *cpu_buffer;
713 struct rb_irq_work *work;
714
715 if (cpu == RING_BUFFER_ALL_CPUS)
716 work = &buffer->irq_work;
717 else {
718 if (!cpumask_test_cpu(cpu, buffer->cpumask))
719 return -EINVAL;
720
721 cpu_buffer = buffer->buffers[cpu];
722 work = &cpu_buffer->irq_work;
723 }
724
725 poll_wait(filp, &work->waiters, poll_table);
726 work->waiters_pending = true;
727
728
729
730
731
732
733
734
735
736
737
738
739
740 smp_mb();
741
742 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
743 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
744 return EPOLLIN | EPOLLRDNORM;
745 return 0;
746}
747
748
749#define RB_WARN_ON(b, cond) \
750 ({ \
751 int _____ret = unlikely(cond); \
752 if (_____ret) { \
753 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
754 struct ring_buffer_per_cpu *__b = \
755 (void *)b; \
756 atomic_inc(&__b->buffer->record_disabled); \
757 } else \
758 atomic_inc(&b->record_disabled); \
759 WARN_ON(1); \
760 } \
761 _____ret; \
762 })
763
764
765#define DEBUG_SHIFT 0
766
767static inline u64 rb_time_stamp(struct ring_buffer *buffer)
768{
769
770 return buffer->clock() << DEBUG_SHIFT;
771}
772
773u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
774{
775 u64 time;
776
777 preempt_disable_notrace();
778 time = rb_time_stamp(buffer);
779 preempt_enable_no_resched_notrace();
780
781 return time;
782}
783EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
784
785void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
786 int cpu, u64 *ts)
787{
788
789 *ts >>= DEBUG_SHIFT;
790}
791EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862#define RB_PAGE_NORMAL 0UL
863#define RB_PAGE_HEAD 1UL
864#define RB_PAGE_UPDATE 2UL
865
866
867#define RB_FLAG_MASK 3UL
868
869
870#define RB_PAGE_MOVED 4UL
871
872
873
874
875static struct list_head *rb_list_head(struct list_head *list)
876{
877 unsigned long val = (unsigned long)list;
878
879 return (struct list_head *)(val & ~RB_FLAG_MASK);
880}
881
882
883
884
885
886
887
888
889
890static inline int
891rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
892 struct buffer_page *page, struct list_head *list)
893{
894 unsigned long val;
895
896 val = (unsigned long)list->next;
897
898 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
899 return RB_PAGE_MOVED;
900
901 return val & RB_FLAG_MASK;
902}
903
904
905
906
907
908
909
910
911static bool rb_is_reader_page(struct buffer_page *page)
912{
913 struct list_head *list = page->list.prev;
914
915 return rb_list_head(list->next) != &page->list;
916}
917
918
919
920
921static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
922 struct list_head *list)
923{
924 unsigned long *ptr;
925
926 ptr = (unsigned long *)&list->next;
927 *ptr |= RB_PAGE_HEAD;
928 *ptr &= ~RB_PAGE_UPDATE;
929}
930
931
932
933
934static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
935{
936 struct buffer_page *head;
937
938 head = cpu_buffer->head_page;
939 if (!head)
940 return;
941
942
943
944
945 rb_set_list_to_head(cpu_buffer, head->list.prev);
946}
947
948static void rb_list_head_clear(struct list_head *list)
949{
950 unsigned long *ptr = (unsigned long *)&list->next;
951
952 *ptr &= ~RB_FLAG_MASK;
953}
954
955
956
957
958static void
959rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
960{
961 struct list_head *hd;
962
963
964 rb_list_head_clear(cpu_buffer->pages);
965
966 list_for_each(hd, cpu_buffer->pages)
967 rb_list_head_clear(hd);
968}
969
970static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
971 struct buffer_page *head,
972 struct buffer_page *prev,
973 int old_flag, int new_flag)
974{
975 struct list_head *list;
976 unsigned long val = (unsigned long)&head->list;
977 unsigned long ret;
978
979 list = &prev->list;
980
981 val &= ~RB_FLAG_MASK;
982
983 ret = cmpxchg((unsigned long *)&list->next,
984 val | old_flag, val | new_flag);
985
986
987 if ((ret & ~RB_FLAG_MASK) != val)
988 return RB_PAGE_MOVED;
989
990 return ret & RB_FLAG_MASK;
991}
992
993static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
994 struct buffer_page *head,
995 struct buffer_page *prev,
996 int old_flag)
997{
998 return rb_head_page_set(cpu_buffer, head, prev,
999 old_flag, RB_PAGE_UPDATE);
1000}
1001
1002static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
1003 struct buffer_page *head,
1004 struct buffer_page *prev,
1005 int old_flag)
1006{
1007 return rb_head_page_set(cpu_buffer, head, prev,
1008 old_flag, RB_PAGE_HEAD);
1009}
1010
1011static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
1012 struct buffer_page *head,
1013 struct buffer_page *prev,
1014 int old_flag)
1015{
1016 return rb_head_page_set(cpu_buffer, head, prev,
1017 old_flag, RB_PAGE_NORMAL);
1018}
1019
1020static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
1021 struct buffer_page **bpage)
1022{
1023 struct list_head *p = rb_list_head((*bpage)->list.next);
1024
1025 *bpage = list_entry(p, struct buffer_page, list);
1026}
1027
1028static struct buffer_page *
1029rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
1030{
1031 struct buffer_page *head;
1032 struct buffer_page *page;
1033 struct list_head *list;
1034 int i;
1035
1036 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
1037 return NULL;
1038
1039
1040 list = cpu_buffer->pages;
1041 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
1042 return NULL;
1043
1044 page = head = cpu_buffer->head_page;
1045
1046
1047
1048
1049
1050
1051 for (i = 0; i < 3; i++) {
1052 do {
1053 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
1054 cpu_buffer->head_page = page;
1055 return page;
1056 }
1057 rb_inc_page(cpu_buffer, &page);
1058 } while (page != head);
1059 }
1060
1061 RB_WARN_ON(cpu_buffer, 1);
1062
1063 return NULL;
1064}
1065
1066static int rb_head_page_replace(struct buffer_page *old,
1067 struct buffer_page *new)
1068{
1069 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
1070 unsigned long val;
1071 unsigned long ret;
1072
1073 val = *ptr & ~RB_FLAG_MASK;
1074 val |= RB_PAGE_HEAD;
1075
1076 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
1077
1078 return ret == val;
1079}
1080
1081
1082
1083
1084static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1085 struct buffer_page *tail_page,
1086 struct buffer_page *next_page)
1087{
1088 unsigned long old_entries;
1089 unsigned long old_write;
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1101 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1102
1103 local_inc(&cpu_buffer->pages_touched);
1104
1105
1106
1107
1108 barrier();
1109
1110
1111
1112
1113
1114
1115 if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
1116
1117 unsigned long val = old_write & ~RB_WRITE_MASK;
1118 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130 (void)local_cmpxchg(&next_page->write, old_write, val);
1131 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1132
1133
1134
1135
1136
1137
1138 local_set(&next_page->page->commit, 0);
1139
1140
1141 (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
1142 }
1143}
1144
1145static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1146 struct buffer_page *bpage)
1147{
1148 unsigned long val = (unsigned long)bpage;
1149
1150 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1151 return 1;
1152
1153 return 0;
1154}
1155
1156
1157
1158
1159static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1160 struct list_head *list)
1161{
1162 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1163 return 1;
1164 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1165 return 1;
1166 return 0;
1167}
1168
1169
1170
1171
1172
1173
1174
1175
1176static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1177{
1178 struct list_head *head = cpu_buffer->pages;
1179 struct buffer_page *bpage, *tmp;
1180
1181
1182 if (cpu_buffer->head_page)
1183 rb_set_head_page(cpu_buffer);
1184
1185 rb_head_page_deactivate(cpu_buffer);
1186
1187 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1188 return -1;
1189 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1190 return -1;
1191
1192 if (rb_check_list(cpu_buffer, head))
1193 return -1;
1194
1195 list_for_each_entry_safe(bpage, tmp, head, list) {
1196 if (RB_WARN_ON(cpu_buffer,
1197 bpage->list.next->prev != &bpage->list))
1198 return -1;
1199 if (RB_WARN_ON(cpu_buffer,
1200 bpage->list.prev->next != &bpage->list))
1201 return -1;
1202 if (rb_check_list(cpu_buffer, &bpage->list))
1203 return -1;
1204 }
1205
1206 rb_head_page_activate(cpu_buffer);
1207
1208 return 0;
1209}
1210
1211static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
1212{
1213 struct buffer_page *bpage, *tmp;
1214 bool user_thread = current->mm != NULL;
1215 gfp_t mflags;
1216 long i;
1217
1218
1219
1220
1221
1222
1223
1224
1225 i = si_mem_available();
1226 if (i < nr_pages)
1227 return -ENOMEM;
1228
1229
1230
1231
1232
1233
1234 mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245 if (user_thread)
1246 set_current_oom_origin();
1247 for (i = 0; i < nr_pages; i++) {
1248 struct page *page;
1249
1250 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1251 mflags, cpu_to_node(cpu));
1252 if (!bpage)
1253 goto free_pages;
1254
1255 list_add(&bpage->list, pages);
1256
1257 page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
1258 if (!page)
1259 goto free_pages;
1260 bpage->page = page_address(page);
1261 rb_init_page(bpage->page);
1262
1263 if (user_thread && fatal_signal_pending(current))
1264 goto free_pages;
1265 }
1266 if (user_thread)
1267 clear_current_oom_origin();
1268
1269 return 0;
1270
1271free_pages:
1272 list_for_each_entry_safe(bpage, tmp, pages, list) {
1273 list_del_init(&bpage->list);
1274 free_buffer_page(bpage);
1275 }
1276 if (user_thread)
1277 clear_current_oom_origin();
1278
1279 return -ENOMEM;
1280}
1281
1282static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1283 unsigned long nr_pages)
1284{
1285 LIST_HEAD(pages);
1286
1287 WARN_ON(!nr_pages);
1288
1289 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1290 return -ENOMEM;
1291
1292
1293
1294
1295
1296
1297 cpu_buffer->pages = pages.next;
1298 list_del(&pages);
1299
1300 cpu_buffer->nr_pages = nr_pages;
1301
1302 rb_check_pages(cpu_buffer);
1303
1304 return 0;
1305}
1306
1307static struct ring_buffer_per_cpu *
1308rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu)
1309{
1310 struct ring_buffer_per_cpu *cpu_buffer;
1311 struct buffer_page *bpage;
1312 struct page *page;
1313 int ret;
1314
1315 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1316 GFP_KERNEL, cpu_to_node(cpu));
1317 if (!cpu_buffer)
1318 return NULL;
1319
1320 cpu_buffer->cpu = cpu;
1321 cpu_buffer->buffer = buffer;
1322 raw_spin_lock_init(&cpu_buffer->reader_lock);
1323 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1324 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1325 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1326 init_completion(&cpu_buffer->update_done);
1327 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1328 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1329 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1330
1331 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1332 GFP_KERNEL, cpu_to_node(cpu));
1333 if (!bpage)
1334 goto fail_free_buffer;
1335
1336 rb_check_bpage(cpu_buffer, bpage);
1337
1338 cpu_buffer->reader_page = bpage;
1339 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1340 if (!page)
1341 goto fail_free_reader;
1342 bpage->page = page_address(page);
1343 rb_init_page(bpage->page);
1344
1345 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1346 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1347
1348 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1349 if (ret < 0)
1350 goto fail_free_reader;
1351
1352 cpu_buffer->head_page
1353 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1354 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1355
1356 rb_head_page_activate(cpu_buffer);
1357
1358 return cpu_buffer;
1359
1360 fail_free_reader:
1361 free_buffer_page(cpu_buffer->reader_page);
1362
1363 fail_free_buffer:
1364 kfree(cpu_buffer);
1365 return NULL;
1366}
1367
1368static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1369{
1370 struct list_head *head = cpu_buffer->pages;
1371 struct buffer_page *bpage, *tmp;
1372
1373 free_buffer_page(cpu_buffer->reader_page);
1374
1375 rb_head_page_deactivate(cpu_buffer);
1376
1377 if (head) {
1378 list_for_each_entry_safe(bpage, tmp, head, list) {
1379 list_del_init(&bpage->list);
1380 free_buffer_page(bpage);
1381 }
1382 bpage = list_entry(head, struct buffer_page, list);
1383 free_buffer_page(bpage);
1384 }
1385
1386 kfree(cpu_buffer);
1387}
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1400 struct lock_class_key *key)
1401{
1402 struct ring_buffer *buffer;
1403 long nr_pages;
1404 int bsize;
1405 int cpu;
1406 int ret;
1407
1408
1409 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1410 GFP_KERNEL);
1411 if (!buffer)
1412 return NULL;
1413
1414 if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1415 goto fail_free_buffer;
1416
1417 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1418 buffer->flags = flags;
1419 buffer->clock = trace_clock_local;
1420 buffer->reader_lock_key = key;
1421
1422 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1423 init_waitqueue_head(&buffer->irq_work.waiters);
1424
1425
1426 if (nr_pages < 2)
1427 nr_pages = 2;
1428
1429 buffer->cpus = nr_cpu_ids;
1430
1431 bsize = sizeof(void *) * nr_cpu_ids;
1432 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1433 GFP_KERNEL);
1434 if (!buffer->buffers)
1435 goto fail_free_cpumask;
1436
1437 cpu = raw_smp_processor_id();
1438 cpumask_set_cpu(cpu, buffer->cpumask);
1439 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1440 if (!buffer->buffers[cpu])
1441 goto fail_free_buffers;
1442
1443 ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1444 if (ret < 0)
1445 goto fail_free_buffers;
1446
1447 mutex_init(&buffer->mutex);
1448
1449 return buffer;
1450
1451 fail_free_buffers:
1452 for_each_buffer_cpu(buffer, cpu) {
1453 if (buffer->buffers[cpu])
1454 rb_free_cpu_buffer(buffer->buffers[cpu]);
1455 }
1456 kfree(buffer->buffers);
1457
1458 fail_free_cpumask:
1459 free_cpumask_var(buffer->cpumask);
1460
1461 fail_free_buffer:
1462 kfree(buffer);
1463 return NULL;
1464}
1465EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1466
1467
1468
1469
1470
1471void
1472ring_buffer_free(struct ring_buffer *buffer)
1473{
1474 int cpu;
1475
1476 cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1477
1478 for_each_buffer_cpu(buffer, cpu)
1479 rb_free_cpu_buffer(buffer->buffers[cpu]);
1480
1481 kfree(buffer->buffers);
1482 free_cpumask_var(buffer->cpumask);
1483
1484 kfree(buffer);
1485}
1486EXPORT_SYMBOL_GPL(ring_buffer_free);
1487
1488void ring_buffer_set_clock(struct ring_buffer *buffer,
1489 u64 (*clock)(void))
1490{
1491 buffer->clock = clock;
1492}
1493
1494void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
1495{
1496 buffer->time_stamp_abs = abs;
1497}
1498
1499bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
1500{
1501 return buffer->time_stamp_abs;
1502}
1503
1504static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1505
1506static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1507{
1508 return local_read(&bpage->entries) & RB_WRITE_MASK;
1509}
1510
1511static inline unsigned long rb_page_write(struct buffer_page *bpage)
1512{
1513 return local_read(&bpage->write) & RB_WRITE_MASK;
1514}
1515
1516static int
1517rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
1518{
1519 struct list_head *tail_page, *to_remove, *next_page;
1520 struct buffer_page *to_remove_page, *tmp_iter_page;
1521 struct buffer_page *last_page, *first_page;
1522 unsigned long nr_removed;
1523 unsigned long head_bit;
1524 int page_entries;
1525
1526 head_bit = 0;
1527
1528 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1529 atomic_inc(&cpu_buffer->record_disabled);
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539 tail_page = &cpu_buffer->tail_page->list;
1540
1541
1542
1543
1544
1545 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1546 tail_page = rb_list_head(tail_page->next);
1547 to_remove = tail_page;
1548
1549
1550 first_page = list_entry(rb_list_head(to_remove->next),
1551 struct buffer_page, list);
1552
1553 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1554 to_remove = rb_list_head(to_remove)->next;
1555 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1556 }
1557
1558 next_page = rb_list_head(to_remove)->next;
1559
1560
1561
1562
1563
1564
1565 tail_page->next = (struct list_head *)((unsigned long)next_page |
1566 head_bit);
1567 next_page = rb_list_head(next_page);
1568 next_page->prev = tail_page;
1569
1570
1571 cpu_buffer->pages = next_page;
1572
1573
1574 if (head_bit)
1575 cpu_buffer->head_page = list_entry(next_page,
1576 struct buffer_page, list);
1577
1578
1579
1580
1581
1582 cpu_buffer->read = 0;
1583
1584
1585 atomic_dec(&cpu_buffer->record_disabled);
1586 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1587
1588 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1589
1590
1591 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1592 list);
1593 tmp_iter_page = first_page;
1594
1595 do {
1596 cond_resched();
1597
1598 to_remove_page = tmp_iter_page;
1599 rb_inc_page(cpu_buffer, &tmp_iter_page);
1600
1601
1602 page_entries = rb_page_entries(to_remove_page);
1603 if (page_entries) {
1604
1605
1606
1607
1608
1609
1610 local_add(page_entries, &cpu_buffer->overrun);
1611 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1612 }
1613
1614
1615
1616
1617
1618 free_buffer_page(to_remove_page);
1619 nr_removed--;
1620
1621 } while (to_remove_page != last_page);
1622
1623 RB_WARN_ON(cpu_buffer, nr_removed);
1624
1625 return nr_removed == 0;
1626}
1627
1628static int
1629rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1630{
1631 struct list_head *pages = &cpu_buffer->new_pages;
1632 int retries, success;
1633
1634 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649 retries = 10;
1650 success = 0;
1651 while (retries--) {
1652 struct list_head *head_page, *prev_page, *r;
1653 struct list_head *last_page, *first_page;
1654 struct list_head *head_page_with_bit;
1655
1656 head_page = &rb_set_head_page(cpu_buffer)->list;
1657 if (!head_page)
1658 break;
1659 prev_page = head_page->prev;
1660
1661 first_page = pages->next;
1662 last_page = pages->prev;
1663
1664 head_page_with_bit = (struct list_head *)
1665 ((unsigned long)head_page | RB_PAGE_HEAD);
1666
1667 last_page->next = head_page_with_bit;
1668 first_page->prev = prev_page;
1669
1670 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1671
1672 if (r == head_page_with_bit) {
1673
1674
1675
1676
1677
1678 head_page->prev = last_page;
1679 success = 1;
1680 break;
1681 }
1682 }
1683
1684 if (success)
1685 INIT_LIST_HEAD(pages);
1686
1687
1688
1689
1690 RB_WARN_ON(cpu_buffer, !success);
1691 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1692
1693
1694 if (!success) {
1695 struct buffer_page *bpage, *tmp;
1696 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1697 list) {
1698 list_del_init(&bpage->list);
1699 free_buffer_page(bpage);
1700 }
1701 }
1702 return success;
1703}
1704
1705static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1706{
1707 int success;
1708
1709 if (cpu_buffer->nr_pages_to_update > 0)
1710 success = rb_insert_pages(cpu_buffer);
1711 else
1712 success = rb_remove_pages(cpu_buffer,
1713 -cpu_buffer->nr_pages_to_update);
1714
1715 if (success)
1716 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1717}
1718
1719static void update_pages_handler(struct work_struct *work)
1720{
1721 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1722 struct ring_buffer_per_cpu, update_pages_work);
1723 rb_update_pages(cpu_buffer);
1724 complete(&cpu_buffer->update_done);
1725}
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1738 int cpu_id)
1739{
1740 struct ring_buffer_per_cpu *cpu_buffer;
1741 unsigned long nr_pages;
1742 int cpu, err = 0;
1743
1744
1745
1746
1747 if (!buffer)
1748 return size;
1749
1750
1751 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1752 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1753 return size;
1754
1755 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1756
1757
1758 if (nr_pages < 2)
1759 nr_pages = 2;
1760
1761 size = nr_pages * BUF_PAGE_SIZE;
1762
1763
1764
1765
1766
1767
1768 if (atomic_read(&buffer->resize_disabled))
1769 return -EBUSY;
1770
1771
1772 mutex_lock(&buffer->mutex);
1773
1774 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1775
1776 for_each_buffer_cpu(buffer, cpu) {
1777 cpu_buffer = buffer->buffers[cpu];
1778
1779 cpu_buffer->nr_pages_to_update = nr_pages -
1780 cpu_buffer->nr_pages;
1781
1782
1783
1784 if (cpu_buffer->nr_pages_to_update <= 0)
1785 continue;
1786
1787
1788
1789
1790 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1791 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1792 &cpu_buffer->new_pages, cpu)) {
1793
1794 err = -ENOMEM;
1795 goto out_err;
1796 }
1797 }
1798
1799 get_online_cpus();
1800
1801
1802
1803
1804
1805 for_each_buffer_cpu(buffer, cpu) {
1806 cpu_buffer = buffer->buffers[cpu];
1807 if (!cpu_buffer->nr_pages_to_update)
1808 continue;
1809
1810
1811 if (!cpu_online(cpu)) {
1812 rb_update_pages(cpu_buffer);
1813 cpu_buffer->nr_pages_to_update = 0;
1814 } else {
1815 schedule_work_on(cpu,
1816 &cpu_buffer->update_pages_work);
1817 }
1818 }
1819
1820
1821 for_each_buffer_cpu(buffer, cpu) {
1822 cpu_buffer = buffer->buffers[cpu];
1823 if (!cpu_buffer->nr_pages_to_update)
1824 continue;
1825
1826 if (cpu_online(cpu))
1827 wait_for_completion(&cpu_buffer->update_done);
1828 cpu_buffer->nr_pages_to_update = 0;
1829 }
1830
1831 put_online_cpus();
1832 } else {
1833
1834 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1835 goto out;
1836
1837 cpu_buffer = buffer->buffers[cpu_id];
1838
1839 if (nr_pages == cpu_buffer->nr_pages)
1840 goto out;
1841
1842 cpu_buffer->nr_pages_to_update = nr_pages -
1843 cpu_buffer->nr_pages;
1844
1845 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1846 if (cpu_buffer->nr_pages_to_update > 0 &&
1847 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1848 &cpu_buffer->new_pages, cpu_id)) {
1849 err = -ENOMEM;
1850 goto out_err;
1851 }
1852
1853 get_online_cpus();
1854
1855
1856 if (!cpu_online(cpu_id))
1857 rb_update_pages(cpu_buffer);
1858 else {
1859 schedule_work_on(cpu_id,
1860 &cpu_buffer->update_pages_work);
1861 wait_for_completion(&cpu_buffer->update_done);
1862 }
1863
1864 cpu_buffer->nr_pages_to_update = 0;
1865 put_online_cpus();
1866 }
1867
1868 out:
1869
1870
1871
1872
1873
1874
1875
1876 if (atomic_read(&buffer->record_disabled)) {
1877 atomic_inc(&buffer->record_disabled);
1878
1879
1880
1881
1882
1883
1884 synchronize_rcu();
1885 for_each_buffer_cpu(buffer, cpu) {
1886 cpu_buffer = buffer->buffers[cpu];
1887 rb_check_pages(cpu_buffer);
1888 }
1889 atomic_dec(&buffer->record_disabled);
1890 }
1891
1892 mutex_unlock(&buffer->mutex);
1893 return size;
1894
1895 out_err:
1896 for_each_buffer_cpu(buffer, cpu) {
1897 struct buffer_page *bpage, *tmp;
1898
1899 cpu_buffer = buffer->buffers[cpu];
1900 cpu_buffer->nr_pages_to_update = 0;
1901
1902 if (list_empty(&cpu_buffer->new_pages))
1903 continue;
1904
1905 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1906 list) {
1907 list_del_init(&bpage->list);
1908 free_buffer_page(bpage);
1909 }
1910 }
1911 mutex_unlock(&buffer->mutex);
1912 return err;
1913}
1914EXPORT_SYMBOL_GPL(ring_buffer_resize);
1915
1916void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1917{
1918 mutex_lock(&buffer->mutex);
1919 if (val)
1920 buffer->flags |= RB_FL_OVERWRITE;
1921 else
1922 buffer->flags &= ~RB_FL_OVERWRITE;
1923 mutex_unlock(&buffer->mutex);
1924}
1925EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1926
1927static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1928{
1929 return bpage->page->data + index;
1930}
1931
1932static __always_inline struct ring_buffer_event *
1933rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1934{
1935 return __rb_page_index(cpu_buffer->reader_page,
1936 cpu_buffer->reader_page->read);
1937}
1938
1939static __always_inline struct ring_buffer_event *
1940rb_iter_head_event(struct ring_buffer_iter *iter)
1941{
1942 return __rb_page_index(iter->head_page, iter->head);
1943}
1944
1945static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
1946{
1947 return local_read(&bpage->page->commit);
1948}
1949
1950
1951static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
1952{
1953 return rb_page_commit(bpage);
1954}
1955
1956static __always_inline unsigned
1957rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1958{
1959 return rb_page_commit(cpu_buffer->commit_page);
1960}
1961
1962static __always_inline unsigned
1963rb_event_index(struct ring_buffer_event *event)
1964{
1965 unsigned long addr = (unsigned long)event;
1966
1967 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1968}
1969
1970static void rb_inc_iter(struct ring_buffer_iter *iter)
1971{
1972 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1973
1974
1975
1976
1977
1978
1979
1980 if (iter->head_page == cpu_buffer->reader_page)
1981 iter->head_page = rb_set_head_page(cpu_buffer);
1982 else
1983 rb_inc_page(cpu_buffer, &iter->head_page);
1984
1985 iter->read_stamp = iter->head_page->page->time_stamp;
1986 iter->head = 0;
1987}
1988
1989
1990
1991
1992
1993
1994
1995
1996static int
1997rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1998 struct buffer_page *tail_page,
1999 struct buffer_page *next_page)
2000{
2001 struct buffer_page *new_head;
2002 int entries;
2003 int type;
2004 int ret;
2005
2006 entries = rb_page_entries(next_page);
2007
2008
2009
2010
2011
2012
2013 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
2014 RB_PAGE_HEAD);
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027 switch (type) {
2028 case RB_PAGE_HEAD:
2029
2030
2031
2032
2033
2034 local_add(entries, &cpu_buffer->overrun);
2035 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
2036
2037
2038
2039
2040
2041
2042
2043 break;
2044
2045 case RB_PAGE_UPDATE:
2046
2047
2048
2049
2050 break;
2051 case RB_PAGE_NORMAL:
2052
2053
2054
2055
2056
2057 return 1;
2058 case RB_PAGE_MOVED:
2059
2060
2061
2062
2063
2064 return 1;
2065 default:
2066 RB_WARN_ON(cpu_buffer, 1);
2067 return -1;
2068 }
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084 new_head = next_page;
2085 rb_inc_page(cpu_buffer, &new_head);
2086
2087 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
2088 RB_PAGE_NORMAL);
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098 switch (ret) {
2099 case RB_PAGE_HEAD:
2100 case RB_PAGE_NORMAL:
2101
2102 break;
2103 default:
2104 RB_WARN_ON(cpu_buffer, 1);
2105 return -1;
2106 }
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118 if (ret == RB_PAGE_NORMAL) {
2119 struct buffer_page *buffer_tail_page;
2120
2121 buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
2122
2123
2124
2125
2126 if (buffer_tail_page != tail_page &&
2127 buffer_tail_page != next_page)
2128 rb_head_page_set_normal(cpu_buffer, new_head,
2129 next_page,
2130 RB_PAGE_HEAD);
2131 }
2132
2133
2134
2135
2136
2137
2138 if (type == RB_PAGE_HEAD) {
2139 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2140 tail_page,
2141 RB_PAGE_UPDATE);
2142 if (RB_WARN_ON(cpu_buffer,
2143 ret != RB_PAGE_UPDATE))
2144 return -1;
2145 }
2146
2147 return 0;
2148}
2149
2150static inline void
2151rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2152 unsigned long tail, struct rb_event_info *info)
2153{
2154 struct buffer_page *tail_page = info->tail_page;
2155 struct ring_buffer_event *event;
2156 unsigned long length = info->length;
2157
2158
2159
2160
2161
2162 if (tail >= BUF_PAGE_SIZE) {
2163
2164
2165
2166
2167
2168 if (tail == BUF_PAGE_SIZE)
2169 tail_page->real_end = 0;
2170
2171 local_sub(length, &tail_page->write);
2172 return;
2173 }
2174
2175 event = __rb_page_index(tail_page, tail);
2176
2177
2178 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2179
2180
2181
2182
2183
2184
2185 tail_page->real_end = tail;
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2199
2200
2201
2202 rb_event_set_padding(event);
2203
2204
2205 local_sub(length, &tail_page->write);
2206 return;
2207 }
2208
2209
2210 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2211 event->type_len = RINGBUF_TYPE_PADDING;
2212
2213 event->time_delta = 1;
2214
2215
2216 length = (tail + length) - BUF_PAGE_SIZE;
2217 local_sub(length, &tail_page->write);
2218}
2219
2220static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2221
2222
2223
2224
2225static noinline struct ring_buffer_event *
2226rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2227 unsigned long tail, struct rb_event_info *info)
2228{
2229 struct buffer_page *tail_page = info->tail_page;
2230 struct buffer_page *commit_page = cpu_buffer->commit_page;
2231 struct ring_buffer *buffer = cpu_buffer->buffer;
2232 struct buffer_page *next_page;
2233 int ret;
2234
2235 next_page = tail_page;
2236
2237 rb_inc_page(cpu_buffer, &next_page);
2238
2239
2240
2241
2242
2243
2244 if (unlikely(next_page == commit_page)) {
2245 local_inc(&cpu_buffer->commit_overrun);
2246 goto out_reset;
2247 }
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2264
2265
2266
2267
2268
2269 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2270
2271
2272
2273
2274 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2275 local_inc(&cpu_buffer->dropped_events);
2276 goto out_reset;
2277 }
2278
2279 ret = rb_handle_head_page(cpu_buffer,
2280 tail_page,
2281 next_page);
2282 if (ret < 0)
2283 goto out_reset;
2284 if (ret)
2285 goto out_again;
2286 } else {
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297 if (unlikely((cpu_buffer->commit_page !=
2298 cpu_buffer->tail_page) &&
2299 (cpu_buffer->commit_page ==
2300 cpu_buffer->reader_page))) {
2301 local_inc(&cpu_buffer->commit_overrun);
2302 goto out_reset;
2303 }
2304 }
2305 }
2306
2307 rb_tail_page_update(cpu_buffer, tail_page, next_page);
2308
2309 out_again:
2310
2311 rb_reset_tail(cpu_buffer, tail, info);
2312
2313
2314 rb_end_commit(cpu_buffer);
2315
2316 local_inc(&cpu_buffer->committing);
2317
2318
2319 return ERR_PTR(-EAGAIN);
2320
2321 out_reset:
2322
2323 rb_reset_tail(cpu_buffer, tail, info);
2324
2325 return NULL;
2326}
2327
2328
2329static noinline struct ring_buffer_event *
2330rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
2331{
2332 if (abs)
2333 event->type_len = RINGBUF_TYPE_TIME_STAMP;
2334 else
2335 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2336
2337
2338 if (abs || rb_event_index(event)) {
2339 event->time_delta = delta & TS_MASK;
2340 event->array[0] = delta >> TS_SHIFT;
2341 } else {
2342
2343 event->time_delta = 0;
2344 event->array[0] = 0;
2345 }
2346
2347 return skip_time_extend(event);
2348}
2349
2350static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2351 struct ring_buffer_event *event);
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364static void
2365rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2366 struct ring_buffer_event *event,
2367 struct rb_event_info *info)
2368{
2369 unsigned length = info->length;
2370 u64 delta = info->delta;
2371
2372
2373 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
2374 delta = 0;
2375
2376
2377
2378
2379
2380 if (unlikely(info->add_timestamp)) {
2381 bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
2382
2383 event = rb_add_time_stamp(event, info->delta, abs);
2384 length -= RB_LEN_TIME_EXTEND;
2385 delta = 0;
2386 }
2387
2388 event->time_delta = delta;
2389 length -= RB_EVNT_HDR_SIZE;
2390 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2391 event->type_len = 0;
2392 event->array[0] = length;
2393 } else
2394 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2395}
2396
2397static unsigned rb_calculate_event_length(unsigned length)
2398{
2399 struct ring_buffer_event event;
2400
2401
2402 if (!length)
2403 length++;
2404
2405 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2406 length += sizeof(event.array[0]);
2407
2408 length += RB_EVNT_HDR_SIZE;
2409 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2424 length += RB_ALIGNMENT;
2425
2426 return length;
2427}
2428
2429#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2430static inline bool sched_clock_stable(void)
2431{
2432 return true;
2433}
2434#endif
2435
2436static inline int
2437rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2438 struct ring_buffer_event *event)
2439{
2440 unsigned long new_index, old_index;
2441 struct buffer_page *bpage;
2442 unsigned long index;
2443 unsigned long addr;
2444
2445 new_index = rb_event_index(event);
2446 old_index = new_index + rb_event_ts_length(event);
2447 addr = (unsigned long)event;
2448 addr &= PAGE_MASK;
2449
2450 bpage = READ_ONCE(cpu_buffer->tail_page);
2451
2452 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2453 unsigned long write_mask =
2454 local_read(&bpage->write) & ~RB_WRITE_MASK;
2455 unsigned long event_length = rb_event_length(event);
2456
2457
2458
2459
2460
2461
2462 old_index += write_mask;
2463 new_index += write_mask;
2464 index = local_cmpxchg(&bpage->write, old_index, new_index);
2465 if (index == old_index) {
2466
2467 local_sub(event_length, &cpu_buffer->entries_bytes);
2468 return 1;
2469 }
2470 }
2471
2472
2473 return 0;
2474}
2475
2476static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2477{
2478 local_inc(&cpu_buffer->committing);
2479 local_inc(&cpu_buffer->commits);
2480}
2481
2482static __always_inline void
2483rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2484{
2485 unsigned long max_count;
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495 again:
2496 max_count = cpu_buffer->nr_pages * 100;
2497
2498 while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2499 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2500 return;
2501 if (RB_WARN_ON(cpu_buffer,
2502 rb_is_reader_page(cpu_buffer->tail_page)))
2503 return;
2504 local_set(&cpu_buffer->commit_page->page->commit,
2505 rb_page_write(cpu_buffer->commit_page));
2506 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
2507
2508 if (rb_page_write(cpu_buffer->commit_page))
2509 cpu_buffer->write_stamp =
2510 cpu_buffer->commit_page->page->time_stamp;
2511
2512 barrier();
2513 }
2514 while (rb_commit_index(cpu_buffer) !=
2515 rb_page_write(cpu_buffer->commit_page)) {
2516
2517 local_set(&cpu_buffer->commit_page->page->commit,
2518 rb_page_write(cpu_buffer->commit_page));
2519 RB_WARN_ON(cpu_buffer,
2520 local_read(&cpu_buffer->commit_page->page->commit) &
2521 ~RB_WRITE_MASK);
2522 barrier();
2523 }
2524
2525
2526 barrier();
2527
2528
2529
2530
2531
2532
2533 if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
2534 goto again;
2535}
2536
2537static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2538{
2539 unsigned long commits;
2540
2541 if (RB_WARN_ON(cpu_buffer,
2542 !local_read(&cpu_buffer->committing)))
2543 return;
2544
2545 again:
2546 commits = local_read(&cpu_buffer->commits);
2547
2548 barrier();
2549 if (local_read(&cpu_buffer->committing) == 1)
2550 rb_set_commit_to_write(cpu_buffer);
2551
2552 local_dec(&cpu_buffer->committing);
2553
2554
2555 barrier();
2556
2557
2558
2559
2560
2561
2562 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2563 !local_read(&cpu_buffer->committing)) {
2564 local_inc(&cpu_buffer->committing);
2565 goto again;
2566 }
2567}
2568
2569static inline void rb_event_discard(struct ring_buffer_event *event)
2570{
2571 if (extended_time(event))
2572 event = skip_time_extend(event);
2573
2574
2575 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2576 event->type_len = RINGBUF_TYPE_PADDING;
2577
2578 if (!event->time_delta)
2579 event->time_delta = 1;
2580}
2581
2582static __always_inline bool
2583rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
2584 struct ring_buffer_event *event)
2585{
2586 unsigned long addr = (unsigned long)event;
2587 unsigned long index;
2588
2589 index = rb_event_index(event);
2590 addr &= PAGE_MASK;
2591
2592 return cpu_buffer->commit_page->page == (void *)addr &&
2593 rb_commit_index(cpu_buffer) == index;
2594}
2595
2596static __always_inline void
2597rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2598 struct ring_buffer_event *event)
2599{
2600 u64 delta;
2601
2602
2603
2604
2605
2606 if (rb_event_is_commit(cpu_buffer, event)) {
2607
2608
2609
2610
2611 if (!rb_event_index(event))
2612 cpu_buffer->write_stamp =
2613 cpu_buffer->commit_page->page->time_stamp;
2614 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2615 delta = ring_buffer_event_time_stamp(event);
2616 cpu_buffer->write_stamp += delta;
2617 } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
2618 delta = ring_buffer_event_time_stamp(event);
2619 cpu_buffer->write_stamp = delta;
2620 } else
2621 cpu_buffer->write_stamp += event->time_delta;
2622 }
2623}
2624
2625static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2626 struct ring_buffer_event *event)
2627{
2628 local_inc(&cpu_buffer->entries);
2629 rb_update_write_stamp(cpu_buffer, event);
2630 rb_end_commit(cpu_buffer);
2631}
2632
2633static __always_inline void
2634rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
2635{
2636 size_t nr_pages;
2637 size_t dirty;
2638 size_t full;
2639
2640 if (buffer->irq_work.waiters_pending) {
2641 buffer->irq_work.waiters_pending = false;
2642
2643 irq_work_queue(&buffer->irq_work.work);
2644 }
2645
2646 if (cpu_buffer->irq_work.waiters_pending) {
2647 cpu_buffer->irq_work.waiters_pending = false;
2648
2649 irq_work_queue(&cpu_buffer->irq_work.work);
2650 }
2651
2652 if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
2653 return;
2654
2655 if (cpu_buffer->reader_page == cpu_buffer->commit_page)
2656 return;
2657
2658 if (!cpu_buffer->irq_work.full_waiters_pending)
2659 return;
2660
2661 cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
2662
2663 full = cpu_buffer->shortest_full;
2664 nr_pages = cpu_buffer->nr_pages;
2665 dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
2666 if (full && nr_pages && (dirty * 100) <= full * nr_pages)
2667 return;
2668
2669 cpu_buffer->irq_work.wakeup_full = true;
2670 cpu_buffer->irq_work.full_waiters_pending = false;
2671
2672 irq_work_queue(&cpu_buffer->irq_work.work);
2673}
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713static __always_inline int
2714trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
2715{
2716 unsigned int val = cpu_buffer->current_context;
2717 unsigned long pc = preempt_count();
2718 int bit;
2719
2720 if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
2721 bit = RB_CTX_NORMAL;
2722 else
2723 bit = pc & NMI_MASK ? RB_CTX_NMI :
2724 pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
2725
2726 if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
2727 return 1;
2728
2729 val |= (1 << (bit + cpu_buffer->nest));
2730 cpu_buffer->current_context = val;
2731
2732 return 0;
2733}
2734
2735static __always_inline void
2736trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
2737{
2738 cpu_buffer->current_context &=
2739 cpu_buffer->current_context - (1 << cpu_buffer->nest);
2740}
2741
2742
2743#define NESTED_BITS 4
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758void ring_buffer_nest_start(struct ring_buffer *buffer)
2759{
2760 struct ring_buffer_per_cpu *cpu_buffer;
2761 int cpu;
2762
2763
2764 preempt_disable_notrace();
2765 cpu = raw_smp_processor_id();
2766 cpu_buffer = buffer->buffers[cpu];
2767
2768 cpu_buffer->nest += NESTED_BITS;
2769}
2770
2771
2772
2773
2774
2775
2776
2777
2778void ring_buffer_nest_end(struct ring_buffer *buffer)
2779{
2780 struct ring_buffer_per_cpu *cpu_buffer;
2781 int cpu;
2782
2783
2784 cpu = raw_smp_processor_id();
2785 cpu_buffer = buffer->buffers[cpu];
2786
2787 cpu_buffer->nest -= NESTED_BITS;
2788 preempt_enable_notrace();
2789}
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2801 struct ring_buffer_event *event)
2802{
2803 struct ring_buffer_per_cpu *cpu_buffer;
2804 int cpu = raw_smp_processor_id();
2805
2806 cpu_buffer = buffer->buffers[cpu];
2807
2808 rb_commit(cpu_buffer, event);
2809
2810 rb_wakeups(buffer, cpu_buffer);
2811
2812 trace_recursive_unlock(cpu_buffer);
2813
2814 preempt_enable_notrace();
2815
2816 return 0;
2817}
2818EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2819
2820static noinline void
2821rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2822 struct rb_event_info *info)
2823{
2824 WARN_ONCE(info->delta > (1ULL << 59),
2825 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2826 (unsigned long long)info->delta,
2827 (unsigned long long)info->ts,
2828 (unsigned long long)cpu_buffer->write_stamp,
2829 sched_clock_stable() ? "" :
2830 "If you just came from a suspend/resume,\n"
2831 "please switch to the trace global clock:\n"
2832 " echo global > /sys/kernel/debug/tracing/trace_clock\n"
2833 "or add trace_clock=global to the kernel command line\n");
2834 info->add_timestamp = 1;
2835}
2836
2837static struct ring_buffer_event *
2838__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2839 struct rb_event_info *info)
2840{
2841 struct ring_buffer_event *event;
2842 struct buffer_page *tail_page;
2843 unsigned long tail, write;
2844
2845
2846
2847
2848
2849
2850 if (unlikely(info->add_timestamp))
2851 info->length += RB_LEN_TIME_EXTEND;
2852
2853
2854 tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
2855 write = local_add_return(info->length, &tail_page->write);
2856
2857
2858 write &= RB_WRITE_MASK;
2859 tail = write - info->length;
2860
2861
2862
2863
2864
2865 if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
2866 info->delta = 0;
2867
2868
2869 if (unlikely(write > BUF_PAGE_SIZE))
2870 return rb_move_tail(cpu_buffer, tail, info);
2871
2872
2873
2874 event = __rb_page_index(tail_page, tail);
2875 rb_update_event(cpu_buffer, event, info);
2876
2877 local_inc(&tail_page->entries);
2878
2879
2880
2881
2882
2883 if (!tail)
2884 tail_page->page->time_stamp = info->ts;
2885
2886
2887 local_add(info->length, &cpu_buffer->entries_bytes);
2888
2889 return event;
2890}
2891
2892static __always_inline struct ring_buffer_event *
2893rb_reserve_next_event(struct ring_buffer *buffer,
2894 struct ring_buffer_per_cpu *cpu_buffer,
2895 unsigned long length)
2896{
2897 struct ring_buffer_event *event;
2898 struct rb_event_info info;
2899 int nr_loops = 0;
2900 u64 diff;
2901
2902 rb_start_commit(cpu_buffer);
2903
2904#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2905
2906
2907
2908
2909
2910
2911 barrier();
2912 if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
2913 local_dec(&cpu_buffer->committing);
2914 local_dec(&cpu_buffer->commits);
2915 return NULL;
2916 }
2917#endif
2918
2919 info.length = rb_calculate_event_length(length);
2920 again:
2921 info.add_timestamp = 0;
2922 info.delta = 0;
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2934 goto out_fail;
2935
2936 info.ts = rb_time_stamp(cpu_buffer->buffer);
2937 diff = info.ts - cpu_buffer->write_stamp;
2938
2939
2940 barrier();
2941
2942 if (ring_buffer_time_stamp_abs(buffer)) {
2943 info.delta = info.ts;
2944 rb_handle_timestamp(cpu_buffer, &info);
2945 } else
2946 if (likely(info.ts >= cpu_buffer->write_stamp)) {
2947 info.delta = diff;
2948 if (unlikely(test_time_stamp(info.delta)))
2949 rb_handle_timestamp(cpu_buffer, &info);
2950 }
2951
2952 event = __rb_reserve_next(cpu_buffer, &info);
2953
2954 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
2955 if (info.add_timestamp)
2956 info.length -= RB_LEN_TIME_EXTEND;
2957 goto again;
2958 }
2959
2960 if (!event)
2961 goto out_fail;
2962
2963 return event;
2964
2965 out_fail:
2966 rb_end_commit(cpu_buffer);
2967 return NULL;
2968}
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985struct ring_buffer_event *
2986ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2987{
2988 struct ring_buffer_per_cpu *cpu_buffer;
2989 struct ring_buffer_event *event;
2990 int cpu;
2991
2992
2993 preempt_disable_notrace();
2994
2995 if (unlikely(atomic_read(&buffer->record_disabled)))
2996 goto out;
2997
2998 cpu = raw_smp_processor_id();
2999
3000 if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
3001 goto out;
3002
3003 cpu_buffer = buffer->buffers[cpu];
3004
3005 if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
3006 goto out;
3007
3008 if (unlikely(length > BUF_MAX_DATA_SIZE))
3009 goto out;
3010
3011 if (unlikely(trace_recursive_lock(cpu_buffer)))
3012 goto out;
3013
3014 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3015 if (!event)
3016 goto out_unlock;
3017
3018 return event;
3019
3020 out_unlock:
3021 trace_recursive_unlock(cpu_buffer);
3022 out:
3023 preempt_enable_notrace();
3024 return NULL;
3025}
3026EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
3027
3028
3029
3030
3031
3032
3033
3034static inline void
3035rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
3036 struct ring_buffer_event *event)
3037{
3038 unsigned long addr = (unsigned long)event;
3039 struct buffer_page *bpage = cpu_buffer->commit_page;
3040 struct buffer_page *start;
3041
3042 addr &= PAGE_MASK;
3043
3044
3045 if (likely(bpage->page == (void *)addr)) {
3046 local_dec(&bpage->entries);
3047 return;
3048 }
3049
3050
3051
3052
3053
3054 rb_inc_page(cpu_buffer, &bpage);
3055 start = bpage;
3056 do {
3057 if (bpage->page == (void *)addr) {
3058 local_dec(&bpage->entries);
3059 return;
3060 }
3061 rb_inc_page(cpu_buffer, &bpage);
3062 } while (bpage != start);
3063
3064
3065 RB_WARN_ON(cpu_buffer, 1);
3066}
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087void ring_buffer_discard_commit(struct ring_buffer *buffer,
3088 struct ring_buffer_event *event)
3089{
3090 struct ring_buffer_per_cpu *cpu_buffer;
3091 int cpu;
3092
3093
3094 rb_event_discard(event);
3095
3096 cpu = smp_processor_id();
3097 cpu_buffer = buffer->buffers[cpu];
3098
3099
3100
3101
3102
3103
3104 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
3105
3106 rb_decrement_entry(cpu_buffer, event);
3107 if (rb_try_to_discard(cpu_buffer, event))
3108 goto out;
3109
3110
3111
3112
3113
3114 rb_update_write_stamp(cpu_buffer, event);
3115 out:
3116 rb_end_commit(cpu_buffer);
3117
3118 trace_recursive_unlock(cpu_buffer);
3119
3120 preempt_enable_notrace();
3121
3122}
3123EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138int ring_buffer_write(struct ring_buffer *buffer,
3139 unsigned long length,
3140 void *data)
3141{
3142 struct ring_buffer_per_cpu *cpu_buffer;
3143 struct ring_buffer_event *event;
3144 void *body;
3145 int ret = -EBUSY;
3146 int cpu;
3147
3148 preempt_disable_notrace();
3149
3150 if (atomic_read(&buffer->record_disabled))
3151 goto out;
3152
3153 cpu = raw_smp_processor_id();
3154
3155 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3156 goto out;
3157
3158 cpu_buffer = buffer->buffers[cpu];
3159
3160 if (atomic_read(&cpu_buffer->record_disabled))
3161 goto out;
3162
3163 if (length > BUF_MAX_DATA_SIZE)
3164 goto out;
3165
3166 if (unlikely(trace_recursive_lock(cpu_buffer)))
3167 goto out;
3168
3169 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3170 if (!event)
3171 goto out_unlock;
3172
3173 body = rb_event_data(event);
3174
3175 memcpy(body, data, length);
3176
3177 rb_commit(cpu_buffer, event);
3178
3179 rb_wakeups(buffer, cpu_buffer);
3180
3181 ret = 0;
3182
3183 out_unlock:
3184 trace_recursive_unlock(cpu_buffer);
3185
3186 out:
3187 preempt_enable_notrace();
3188
3189 return ret;
3190}
3191EXPORT_SYMBOL_GPL(ring_buffer_write);
3192
3193static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3194{
3195 struct buffer_page *reader = cpu_buffer->reader_page;
3196 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3197 struct buffer_page *commit = cpu_buffer->commit_page;
3198
3199
3200 if (unlikely(!head))
3201 return true;
3202
3203 return reader->read == rb_page_commit(reader) &&
3204 (commit == reader ||
3205 (commit == head &&
3206 head->read == rb_page_commit(commit)));
3207}
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218void ring_buffer_record_disable(struct ring_buffer *buffer)
3219{
3220 atomic_inc(&buffer->record_disabled);
3221}
3222EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3223
3224
3225
3226
3227
3228
3229
3230
3231void ring_buffer_record_enable(struct ring_buffer *buffer)
3232{
3233 atomic_dec(&buffer->record_disabled);
3234}
3235EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248void ring_buffer_record_off(struct ring_buffer *buffer)
3249{
3250 unsigned int rd;
3251 unsigned int new_rd;
3252
3253 do {
3254 rd = atomic_read(&buffer->record_disabled);
3255 new_rd = rd | RB_BUFFER_OFF;
3256 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3257}
3258EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271void ring_buffer_record_on(struct ring_buffer *buffer)
3272{
3273 unsigned int rd;
3274 unsigned int new_rd;
3275
3276 do {
3277 rd = atomic_read(&buffer->record_disabled);
3278 new_rd = rd & ~RB_BUFFER_OFF;
3279 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3280}
3281EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3282
3283
3284
3285
3286
3287
3288
3289bool ring_buffer_record_is_on(struct ring_buffer *buffer)
3290{
3291 return !atomic_read(&buffer->record_disabled);
3292}
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305bool ring_buffer_record_is_set_on(struct ring_buffer *buffer)
3306{
3307 return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
3308}
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
3321{
3322 struct ring_buffer_per_cpu *cpu_buffer;
3323
3324 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3325 return;
3326
3327 cpu_buffer = buffer->buffers[cpu];
3328 atomic_inc(&cpu_buffer->record_disabled);
3329}
3330EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
3341{
3342 struct ring_buffer_per_cpu *cpu_buffer;
3343
3344 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3345 return;
3346
3347 cpu_buffer = buffer->buffers[cpu];
3348 atomic_dec(&cpu_buffer->record_disabled);
3349}
3350EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
3351
3352
3353
3354
3355
3356
3357
3358static inline unsigned long
3359rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
3360{
3361 return local_read(&cpu_buffer->entries) -
3362 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
3363}
3364
3365
3366
3367
3368
3369
3370u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
3371{
3372 unsigned long flags;
3373 struct ring_buffer_per_cpu *cpu_buffer;
3374 struct buffer_page *bpage;
3375 u64 ret = 0;
3376
3377 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3378 return 0;
3379
3380 cpu_buffer = buffer->buffers[cpu];
3381 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3382
3383
3384
3385
3386 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
3387 bpage = cpu_buffer->reader_page;
3388 else
3389 bpage = rb_set_head_page(cpu_buffer);
3390 if (bpage)
3391 ret = bpage->page->time_stamp;
3392 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3393
3394 return ret;
3395}
3396EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
3397
3398
3399
3400
3401
3402
3403unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
3404{
3405 struct ring_buffer_per_cpu *cpu_buffer;
3406 unsigned long ret;
3407
3408 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3409 return 0;
3410
3411 cpu_buffer = buffer->buffers[cpu];
3412 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
3413
3414 return ret;
3415}
3416EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
3417
3418
3419
3420
3421
3422
3423unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
3424{
3425 struct ring_buffer_per_cpu *cpu_buffer;
3426
3427 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3428 return 0;
3429
3430 cpu_buffer = buffer->buffers[cpu];
3431
3432 return rb_num_of_entries(cpu_buffer);
3433}
3434EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3435
3436
3437
3438
3439
3440
3441
3442unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3443{
3444 struct ring_buffer_per_cpu *cpu_buffer;
3445 unsigned long ret;
3446
3447 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3448 return 0;
3449
3450 cpu_buffer = buffer->buffers[cpu];
3451 ret = local_read(&cpu_buffer->overrun);
3452
3453 return ret;
3454}
3455EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3456
3457
3458
3459
3460
3461
3462
3463
3464unsigned long
3465ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3466{
3467 struct ring_buffer_per_cpu *cpu_buffer;
3468 unsigned long ret;
3469
3470 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3471 return 0;
3472
3473 cpu_buffer = buffer->buffers[cpu];
3474 ret = local_read(&cpu_buffer->commit_overrun);
3475
3476 return ret;
3477}
3478EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3479
3480
3481
3482
3483
3484
3485
3486unsigned long
3487ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3488{
3489 struct ring_buffer_per_cpu *cpu_buffer;
3490 unsigned long ret;
3491
3492 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3493 return 0;
3494
3495 cpu_buffer = buffer->buffers[cpu];
3496 ret = local_read(&cpu_buffer->dropped_events);
3497
3498 return ret;
3499}
3500EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3501
3502
3503
3504
3505
3506
3507unsigned long
3508ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3509{
3510 struct ring_buffer_per_cpu *cpu_buffer;
3511
3512 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3513 return 0;
3514
3515 cpu_buffer = buffer->buffers[cpu];
3516 return cpu_buffer->read;
3517}
3518EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3519
3520
3521
3522
3523
3524
3525
3526
3527unsigned long ring_buffer_entries(struct ring_buffer *buffer)
3528{
3529 struct ring_buffer_per_cpu *cpu_buffer;
3530 unsigned long entries = 0;
3531 int cpu;
3532
3533
3534 for_each_buffer_cpu(buffer, cpu) {
3535 cpu_buffer = buffer->buffers[cpu];
3536 entries += rb_num_of_entries(cpu_buffer);
3537 }
3538
3539 return entries;
3540}
3541EXPORT_SYMBOL_GPL(ring_buffer_entries);
3542
3543
3544
3545
3546
3547
3548
3549
3550unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
3551{
3552 struct ring_buffer_per_cpu *cpu_buffer;
3553 unsigned long overruns = 0;
3554 int cpu;
3555
3556
3557 for_each_buffer_cpu(buffer, cpu) {
3558 cpu_buffer = buffer->buffers[cpu];
3559 overruns += local_read(&cpu_buffer->overrun);
3560 }
3561
3562 return overruns;
3563}
3564EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3565
3566static void rb_iter_reset(struct ring_buffer_iter *iter)
3567{
3568 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3569
3570
3571 iter->head_page = cpu_buffer->reader_page;
3572 iter->head = cpu_buffer->reader_page->read;
3573
3574 iter->cache_reader_page = iter->head_page;
3575 iter->cache_read = cpu_buffer->read;
3576
3577 if (iter->head)
3578 iter->read_stamp = cpu_buffer->read_stamp;
3579 else
3580 iter->read_stamp = iter->head_page->page->time_stamp;
3581}
3582
3583
3584
3585
3586
3587
3588
3589
3590void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3591{
3592 struct ring_buffer_per_cpu *cpu_buffer;
3593 unsigned long flags;
3594
3595 if (!iter)
3596 return;
3597
3598 cpu_buffer = iter->cpu_buffer;
3599
3600 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3601 rb_iter_reset(iter);
3602 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3603}
3604EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3605
3606
3607
3608
3609
3610int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3611{
3612 struct ring_buffer_per_cpu *cpu_buffer;
3613 struct buffer_page *reader;
3614 struct buffer_page *head_page;
3615 struct buffer_page *commit_page;
3616 unsigned commit;
3617
3618 cpu_buffer = iter->cpu_buffer;
3619
3620
3621 reader = cpu_buffer->reader_page;
3622 head_page = cpu_buffer->head_page;
3623 commit_page = cpu_buffer->commit_page;
3624 commit = rb_page_commit(commit_page);
3625
3626 return ((iter->head_page == commit_page && iter->head == commit) ||
3627 (iter->head_page == reader && commit_page == head_page &&
3628 head_page->read == commit &&
3629 iter->head == rb_page_commit(cpu_buffer->reader_page)));
3630}
3631EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3632
3633static void
3634rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3635 struct ring_buffer_event *event)
3636{
3637 u64 delta;
3638
3639 switch (event->type_len) {
3640 case RINGBUF_TYPE_PADDING:
3641 return;
3642
3643 case RINGBUF_TYPE_TIME_EXTEND:
3644 delta = ring_buffer_event_time_stamp(event);
3645 cpu_buffer->read_stamp += delta;
3646 return;
3647
3648 case RINGBUF_TYPE_TIME_STAMP:
3649 delta = ring_buffer_event_time_stamp(event);
3650 cpu_buffer->read_stamp = delta;
3651 return;
3652
3653 case RINGBUF_TYPE_DATA:
3654 cpu_buffer->read_stamp += event->time_delta;
3655 return;
3656
3657 default:
3658 BUG();
3659 }
3660 return;
3661}
3662
3663static void
3664rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3665 struct ring_buffer_event *event)
3666{
3667 u64 delta;
3668
3669 switch (event->type_len) {
3670 case RINGBUF_TYPE_PADDING:
3671 return;
3672
3673 case RINGBUF_TYPE_TIME_EXTEND:
3674 delta = ring_buffer_event_time_stamp(event);
3675 iter->read_stamp += delta;
3676 return;
3677
3678 case RINGBUF_TYPE_TIME_STAMP:
3679 delta = ring_buffer_event_time_stamp(event);
3680 iter->read_stamp = delta;
3681 return;
3682
3683 case RINGBUF_TYPE_DATA:
3684 iter->read_stamp += event->time_delta;
3685 return;
3686
3687 default:
3688 BUG();
3689 }
3690 return;
3691}
3692
3693static struct buffer_page *
3694rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3695{
3696 struct buffer_page *reader = NULL;
3697 unsigned long overwrite;
3698 unsigned long flags;
3699 int nr_loops = 0;
3700 int ret;
3701
3702 local_irq_save(flags);
3703 arch_spin_lock(&cpu_buffer->lock);
3704
3705 again:
3706
3707
3708
3709
3710
3711
3712 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3713 reader = NULL;
3714 goto out;
3715 }
3716
3717 reader = cpu_buffer->reader_page;
3718
3719
3720 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3721 goto out;
3722
3723
3724 if (RB_WARN_ON(cpu_buffer,
3725 cpu_buffer->reader_page->read > rb_page_size(reader)))
3726 goto out;
3727
3728
3729 reader = NULL;
3730 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3731 goto out;
3732
3733
3734 if (rb_num_of_entries(cpu_buffer) == 0)
3735 goto out;
3736
3737
3738
3739
3740 local_set(&cpu_buffer->reader_page->write, 0);
3741 local_set(&cpu_buffer->reader_page->entries, 0);
3742 local_set(&cpu_buffer->reader_page->page->commit, 0);
3743 cpu_buffer->reader_page->real_end = 0;
3744
3745 spin:
3746
3747
3748
3749 reader = rb_set_head_page(cpu_buffer);
3750 if (!reader)
3751 goto out;
3752 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3753 cpu_buffer->reader_page->list.prev = reader->list.prev;
3754
3755
3756
3757
3758
3759
3760 cpu_buffer->pages = reader->list.prev;
3761
3762
3763 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774 smp_mb();
3775 overwrite = local_read(&(cpu_buffer->overrun));
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3789
3790
3791
3792
3793 if (!ret)
3794 goto spin;
3795
3796
3797
3798
3799
3800
3801 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3802 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3803
3804 local_inc(&cpu_buffer->pages_read);
3805
3806
3807 cpu_buffer->reader_page = reader;
3808 cpu_buffer->reader_page->read = 0;
3809
3810 if (overwrite != cpu_buffer->last_overrun) {
3811 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3812 cpu_buffer->last_overrun = overwrite;
3813 }
3814
3815 goto again;
3816
3817 out:
3818
3819 if (reader && reader->read == 0)
3820 cpu_buffer->read_stamp = reader->page->time_stamp;
3821
3822 arch_spin_unlock(&cpu_buffer->lock);
3823 local_irq_restore(flags);
3824
3825 return reader;
3826}
3827
3828static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3829{
3830 struct ring_buffer_event *event;
3831 struct buffer_page *reader;
3832 unsigned length;
3833
3834 reader = rb_get_reader_page(cpu_buffer);
3835
3836
3837 if (RB_WARN_ON(cpu_buffer, !reader))
3838 return;
3839
3840 event = rb_reader_event(cpu_buffer);
3841
3842 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3843 cpu_buffer->read++;
3844
3845 rb_update_read_stamp(cpu_buffer, event);
3846
3847 length = rb_event_length(event);
3848 cpu_buffer->reader_page->read += length;
3849}
3850
3851static void rb_advance_iter(struct ring_buffer_iter *iter)
3852{
3853 struct ring_buffer_per_cpu *cpu_buffer;
3854 struct ring_buffer_event *event;
3855 unsigned length;
3856
3857 cpu_buffer = iter->cpu_buffer;
3858
3859
3860
3861
3862 if (iter->head >= rb_page_size(iter->head_page)) {
3863
3864 if (iter->head_page == cpu_buffer->commit_page)
3865 return;
3866 rb_inc_iter(iter);
3867 return;
3868 }
3869
3870 event = rb_iter_head_event(iter);
3871
3872 length = rb_event_length(event);
3873
3874
3875
3876
3877
3878 if (RB_WARN_ON(cpu_buffer,
3879 (iter->head_page == cpu_buffer->commit_page) &&
3880 (iter->head + length > rb_commit_index(cpu_buffer))))
3881 return;
3882
3883 rb_update_iter_read_stamp(iter, event);
3884
3885 iter->head += length;
3886
3887
3888 if ((iter->head >= rb_page_size(iter->head_page)) &&
3889 (iter->head_page != cpu_buffer->commit_page))
3890 rb_inc_iter(iter);
3891}
3892
3893static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3894{
3895 return cpu_buffer->lost_events;
3896}
3897
3898static struct ring_buffer_event *
3899rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3900 unsigned long *lost_events)
3901{
3902 struct ring_buffer_event *event;
3903 struct buffer_page *reader;
3904 int nr_loops = 0;
3905
3906 if (ts)
3907 *ts = 0;
3908 again:
3909
3910
3911
3912
3913
3914
3915 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3916 return NULL;
3917
3918 reader = rb_get_reader_page(cpu_buffer);
3919 if (!reader)
3920 return NULL;
3921
3922 event = rb_reader_event(cpu_buffer);
3923
3924 switch (event->type_len) {
3925 case RINGBUF_TYPE_PADDING:
3926 if (rb_null_event(event))
3927 RB_WARN_ON(cpu_buffer, 1);
3928
3929
3930
3931
3932
3933
3934
3935
3936 return event;
3937
3938 case RINGBUF_TYPE_TIME_EXTEND:
3939
3940 rb_advance_reader(cpu_buffer);
3941 goto again;
3942
3943 case RINGBUF_TYPE_TIME_STAMP:
3944 if (ts) {
3945 *ts = ring_buffer_event_time_stamp(event);
3946 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3947 cpu_buffer->cpu, ts);
3948 }
3949
3950 rb_advance_reader(cpu_buffer);
3951 goto again;
3952
3953 case RINGBUF_TYPE_DATA:
3954 if (ts && !(*ts)) {
3955 *ts = cpu_buffer->read_stamp + event->time_delta;
3956 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3957 cpu_buffer->cpu, ts);
3958 }
3959 if (lost_events)
3960 *lost_events = rb_lost_events(cpu_buffer);
3961 return event;
3962
3963 default:
3964 BUG();
3965 }
3966
3967 return NULL;
3968}
3969EXPORT_SYMBOL_GPL(ring_buffer_peek);
3970
3971static struct ring_buffer_event *
3972rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3973{
3974 struct ring_buffer *buffer;
3975 struct ring_buffer_per_cpu *cpu_buffer;
3976 struct ring_buffer_event *event;
3977 int nr_loops = 0;
3978
3979 if (ts)
3980 *ts = 0;
3981
3982 cpu_buffer = iter->cpu_buffer;
3983 buffer = cpu_buffer->buffer;
3984
3985
3986
3987
3988
3989
3990 if (unlikely(iter->cache_read != cpu_buffer->read ||
3991 iter->cache_reader_page != cpu_buffer->reader_page))
3992 rb_iter_reset(iter);
3993
3994 again:
3995 if (ring_buffer_iter_empty(iter))
3996 return NULL;
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3))
4007 return NULL;
4008
4009 if (rb_per_cpu_empty(cpu_buffer))
4010 return NULL;
4011
4012 if (iter->head >= rb_page_size(iter->head_page)) {
4013 rb_inc_iter(iter);
4014 goto again;
4015 }
4016
4017 event = rb_iter_head_event(iter);
4018
4019 switch (event->type_len) {
4020 case RINGBUF_TYPE_PADDING:
4021 if (rb_null_event(event)) {
4022 rb_inc_iter(iter);
4023 goto again;
4024 }
4025 rb_advance_iter(iter);
4026 return event;
4027
4028 case RINGBUF_TYPE_TIME_EXTEND:
4029
4030 rb_advance_iter(iter);
4031 goto again;
4032
4033 case RINGBUF_TYPE_TIME_STAMP:
4034 if (ts) {
4035 *ts = ring_buffer_event_time_stamp(event);
4036 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4037 cpu_buffer->cpu, ts);
4038 }
4039
4040 rb_advance_iter(iter);
4041 goto again;
4042
4043 case RINGBUF_TYPE_DATA:
4044 if (ts && !(*ts)) {
4045 *ts = iter->read_stamp + event->time_delta;
4046 ring_buffer_normalize_time_stamp(buffer,
4047 cpu_buffer->cpu, ts);
4048 }
4049 return event;
4050
4051 default:
4052 BUG();
4053 }
4054
4055 return NULL;
4056}
4057EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
4058
4059static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
4060{
4061 if (likely(!in_nmi())) {
4062 raw_spin_lock(&cpu_buffer->reader_lock);
4063 return true;
4064 }
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075 if (raw_spin_trylock(&cpu_buffer->reader_lock))
4076 return true;
4077
4078
4079 atomic_inc(&cpu_buffer->record_disabled);
4080 return false;
4081}
4082
4083static inline void
4084rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
4085{
4086 if (likely(locked))
4087 raw_spin_unlock(&cpu_buffer->reader_lock);
4088 return;
4089}
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101struct ring_buffer_event *
4102ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
4103 unsigned long *lost_events)
4104{
4105 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4106 struct ring_buffer_event *event;
4107 unsigned long flags;
4108 bool dolock;
4109
4110 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4111 return NULL;
4112
4113 again:
4114 local_irq_save(flags);
4115 dolock = rb_reader_lock(cpu_buffer);
4116 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4117 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4118 rb_advance_reader(cpu_buffer);
4119 rb_reader_unlock(cpu_buffer, dolock);
4120 local_irq_restore(flags);
4121
4122 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4123 goto again;
4124
4125 return event;
4126}
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136struct ring_buffer_event *
4137ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4138{
4139 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4140 struct ring_buffer_event *event;
4141 unsigned long flags;
4142
4143 again:
4144 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4145 event = rb_iter_peek(iter, ts);
4146 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4147
4148 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4149 goto again;
4150
4151 return event;
4152}
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165struct ring_buffer_event *
4166ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
4167 unsigned long *lost_events)
4168{
4169 struct ring_buffer_per_cpu *cpu_buffer;
4170 struct ring_buffer_event *event = NULL;
4171 unsigned long flags;
4172 bool dolock;
4173
4174 again:
4175
4176 preempt_disable();
4177
4178 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4179 goto out;
4180
4181 cpu_buffer = buffer->buffers[cpu];
4182 local_irq_save(flags);
4183 dolock = rb_reader_lock(cpu_buffer);
4184
4185 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4186 if (event) {
4187 cpu_buffer->lost_events = 0;
4188 rb_advance_reader(cpu_buffer);
4189 }
4190
4191 rb_reader_unlock(cpu_buffer, dolock);
4192 local_irq_restore(flags);
4193
4194 out:
4195 preempt_enable();
4196
4197 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4198 goto again;
4199
4200 return event;
4201}
4202EXPORT_SYMBOL_GPL(ring_buffer_consume);
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224struct ring_buffer_iter *
4225ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
4226{
4227 struct ring_buffer_per_cpu *cpu_buffer;
4228 struct ring_buffer_iter *iter;
4229
4230 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4231 return NULL;
4232
4233 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
4234 if (!iter)
4235 return NULL;
4236
4237 cpu_buffer = buffer->buffers[cpu];
4238
4239 iter->cpu_buffer = cpu_buffer;
4240
4241 atomic_inc(&buffer->resize_disabled);
4242 atomic_inc(&cpu_buffer->record_disabled);
4243
4244 return iter;
4245}
4246EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
4247
4248
4249
4250
4251
4252
4253
4254
4255void
4256ring_buffer_read_prepare_sync(void)
4257{
4258 synchronize_rcu();
4259}
4260EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273void
4274ring_buffer_read_start(struct ring_buffer_iter *iter)
4275{
4276 struct ring_buffer_per_cpu *cpu_buffer;
4277 unsigned long flags;
4278
4279 if (!iter)
4280 return;
4281
4282 cpu_buffer = iter->cpu_buffer;
4283
4284 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4285 arch_spin_lock(&cpu_buffer->lock);
4286 rb_iter_reset(iter);
4287 arch_spin_unlock(&cpu_buffer->lock);
4288 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4289}
4290EXPORT_SYMBOL_GPL(ring_buffer_read_start);
4291
4292
4293
4294
4295
4296
4297
4298
4299void
4300ring_buffer_read_finish(struct ring_buffer_iter *iter)
4301{
4302 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4303 unsigned long flags;
4304
4305
4306
4307
4308
4309
4310
4311 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4312 rb_check_pages(cpu_buffer);
4313 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4314
4315 atomic_dec(&cpu_buffer->record_disabled);
4316 atomic_dec(&cpu_buffer->buffer->resize_disabled);
4317 kfree(iter);
4318}
4319EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
4320
4321
4322
4323
4324
4325
4326
4327
4328struct ring_buffer_event *
4329ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
4330{
4331 struct ring_buffer_event *event;
4332 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4333 unsigned long flags;
4334
4335 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4336 again:
4337 event = rb_iter_peek(iter, ts);
4338 if (!event)
4339 goto out;
4340
4341 if (event->type_len == RINGBUF_TYPE_PADDING)
4342 goto again;
4343
4344 rb_advance_iter(iter);
4345 out:
4346 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4347
4348 return event;
4349}
4350EXPORT_SYMBOL_GPL(ring_buffer_read);
4351
4352
4353
4354
4355
4356unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
4357{
4358
4359
4360
4361
4362
4363
4364 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4365 return 0;
4366
4367 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
4368}
4369EXPORT_SYMBOL_GPL(ring_buffer_size);
4370
4371static void
4372rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
4373{
4374 rb_head_page_deactivate(cpu_buffer);
4375
4376 cpu_buffer->head_page
4377 = list_entry(cpu_buffer->pages, struct buffer_page, list);
4378 local_set(&cpu_buffer->head_page->write, 0);
4379 local_set(&cpu_buffer->head_page->entries, 0);
4380 local_set(&cpu_buffer->head_page->page->commit, 0);
4381
4382 cpu_buffer->head_page->read = 0;
4383
4384 cpu_buffer->tail_page = cpu_buffer->head_page;
4385 cpu_buffer->commit_page = cpu_buffer->head_page;
4386
4387 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
4388 INIT_LIST_HEAD(&cpu_buffer->new_pages);
4389 local_set(&cpu_buffer->reader_page->write, 0);
4390 local_set(&cpu_buffer->reader_page->entries, 0);
4391 local_set(&cpu_buffer->reader_page->page->commit, 0);
4392 cpu_buffer->reader_page->read = 0;
4393
4394 local_set(&cpu_buffer->entries_bytes, 0);
4395 local_set(&cpu_buffer->overrun, 0);
4396 local_set(&cpu_buffer->commit_overrun, 0);
4397 local_set(&cpu_buffer->dropped_events, 0);
4398 local_set(&cpu_buffer->entries, 0);
4399 local_set(&cpu_buffer->committing, 0);
4400 local_set(&cpu_buffer->commits, 0);
4401 local_set(&cpu_buffer->pages_touched, 0);
4402 local_set(&cpu_buffer->pages_read, 0);
4403 cpu_buffer->last_pages_touch = 0;
4404 cpu_buffer->shortest_full = 0;
4405 cpu_buffer->read = 0;
4406 cpu_buffer->read_bytes = 0;
4407
4408 cpu_buffer->write_stamp = 0;
4409 cpu_buffer->read_stamp = 0;
4410
4411 cpu_buffer->lost_events = 0;
4412 cpu_buffer->last_overrun = 0;
4413
4414 rb_head_page_activate(cpu_buffer);
4415}
4416
4417
4418
4419
4420
4421
4422void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
4423{
4424 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4425 unsigned long flags;
4426
4427 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4428 return;
4429
4430 atomic_inc(&buffer->resize_disabled);
4431 atomic_inc(&cpu_buffer->record_disabled);
4432
4433
4434 synchronize_rcu();
4435
4436 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4437
4438 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
4439 goto out;
4440
4441 arch_spin_lock(&cpu_buffer->lock);
4442
4443 rb_reset_cpu(cpu_buffer);
4444
4445 arch_spin_unlock(&cpu_buffer->lock);
4446
4447 out:
4448 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4449
4450 atomic_dec(&cpu_buffer->record_disabled);
4451 atomic_dec(&buffer->resize_disabled);
4452}
4453EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
4454
4455
4456
4457
4458
4459void ring_buffer_reset(struct ring_buffer *buffer)
4460{
4461 int cpu;
4462
4463 for_each_buffer_cpu(buffer, cpu)
4464 ring_buffer_reset_cpu(buffer, cpu);
4465}
4466EXPORT_SYMBOL_GPL(ring_buffer_reset);
4467
4468
4469
4470
4471
4472bool ring_buffer_empty(struct ring_buffer *buffer)
4473{
4474 struct ring_buffer_per_cpu *cpu_buffer;
4475 unsigned long flags;
4476 bool dolock;
4477 int cpu;
4478 int ret;
4479
4480
4481 for_each_buffer_cpu(buffer, cpu) {
4482 cpu_buffer = buffer->buffers[cpu];
4483 local_irq_save(flags);
4484 dolock = rb_reader_lock(cpu_buffer);
4485 ret = rb_per_cpu_empty(cpu_buffer);
4486 rb_reader_unlock(cpu_buffer, dolock);
4487 local_irq_restore(flags);
4488
4489 if (!ret)
4490 return false;
4491 }
4492
4493 return true;
4494}
4495EXPORT_SYMBOL_GPL(ring_buffer_empty);
4496
4497
4498
4499
4500
4501
4502bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
4503{
4504 struct ring_buffer_per_cpu *cpu_buffer;
4505 unsigned long flags;
4506 bool dolock;
4507 int ret;
4508
4509 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4510 return true;
4511
4512 cpu_buffer = buffer->buffers[cpu];
4513 local_irq_save(flags);
4514 dolock = rb_reader_lock(cpu_buffer);
4515 ret = rb_per_cpu_empty(cpu_buffer);
4516 rb_reader_unlock(cpu_buffer, dolock);
4517 local_irq_restore(flags);
4518
4519 return ret;
4520}
4521EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
4522
4523#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4535 struct ring_buffer *buffer_b, int cpu)
4536{
4537 struct ring_buffer_per_cpu *cpu_buffer_a;
4538 struct ring_buffer_per_cpu *cpu_buffer_b;
4539 int ret = -EINVAL;
4540
4541 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4542 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4543 goto out;
4544
4545 cpu_buffer_a = buffer_a->buffers[cpu];
4546 cpu_buffer_b = buffer_b->buffers[cpu];
4547
4548
4549 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4550 goto out;
4551
4552 ret = -EAGAIN;
4553
4554 if (atomic_read(&buffer_a->record_disabled))
4555 goto out;
4556
4557 if (atomic_read(&buffer_b->record_disabled))
4558 goto out;
4559
4560 if (atomic_read(&cpu_buffer_a->record_disabled))
4561 goto out;
4562
4563 if (atomic_read(&cpu_buffer_b->record_disabled))
4564 goto out;
4565
4566
4567
4568
4569
4570
4571
4572 atomic_inc(&cpu_buffer_a->record_disabled);
4573 atomic_inc(&cpu_buffer_b->record_disabled);
4574
4575 ret = -EBUSY;
4576 if (local_read(&cpu_buffer_a->committing))
4577 goto out_dec;
4578 if (local_read(&cpu_buffer_b->committing))
4579 goto out_dec;
4580
4581 buffer_a->buffers[cpu] = cpu_buffer_b;
4582 buffer_b->buffers[cpu] = cpu_buffer_a;
4583
4584 cpu_buffer_b->buffer = buffer_a;
4585 cpu_buffer_a->buffer = buffer_b;
4586
4587 ret = 0;
4588
4589out_dec:
4590 atomic_dec(&cpu_buffer_a->record_disabled);
4591 atomic_dec(&cpu_buffer_b->record_disabled);
4592out:
4593 return ret;
4594}
4595EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4596#endif
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4615{
4616 struct ring_buffer_per_cpu *cpu_buffer;
4617 struct buffer_data_page *bpage = NULL;
4618 unsigned long flags;
4619 struct page *page;
4620
4621 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4622 return ERR_PTR(-ENODEV);
4623
4624 cpu_buffer = buffer->buffers[cpu];
4625 local_irq_save(flags);
4626 arch_spin_lock(&cpu_buffer->lock);
4627
4628 if (cpu_buffer->free_page) {
4629 bpage = cpu_buffer->free_page;
4630 cpu_buffer->free_page = NULL;
4631 }
4632
4633 arch_spin_unlock(&cpu_buffer->lock);
4634 local_irq_restore(flags);
4635
4636 if (bpage)
4637 goto out;
4638
4639 page = alloc_pages_node(cpu_to_node(cpu),
4640 GFP_KERNEL | __GFP_NORETRY, 0);
4641 if (!page)
4642 return ERR_PTR(-ENOMEM);
4643
4644 bpage = page_address(page);
4645
4646 out:
4647 rb_init_page(bpage);
4648
4649 return bpage;
4650}
4651EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4662{
4663 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4664 struct buffer_data_page *bpage = data;
4665 struct page *page = virt_to_page(bpage);
4666 unsigned long flags;
4667
4668
4669 if (page_ref_count(page) > 1)
4670 goto out;
4671
4672 local_irq_save(flags);
4673 arch_spin_lock(&cpu_buffer->lock);
4674
4675 if (!cpu_buffer->free_page) {
4676 cpu_buffer->free_page = bpage;
4677 bpage = NULL;
4678 }
4679
4680 arch_spin_unlock(&cpu_buffer->lock);
4681 local_irq_restore(flags);
4682
4683 out:
4684 free_page((unsigned long)bpage);
4685}
4686EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721int ring_buffer_read_page(struct ring_buffer *buffer,
4722 void **data_page, size_t len, int cpu, int full)
4723{
4724 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4725 struct ring_buffer_event *event;
4726 struct buffer_data_page *bpage;
4727 struct buffer_page *reader;
4728 unsigned long missed_events;
4729 unsigned long flags;
4730 unsigned int commit;
4731 unsigned int read;
4732 u64 save_timestamp;
4733 int ret = -1;
4734
4735 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4736 goto out;
4737
4738
4739
4740
4741
4742 if (len <= BUF_PAGE_HDR_SIZE)
4743 goto out;
4744
4745 len -= BUF_PAGE_HDR_SIZE;
4746
4747 if (!data_page)
4748 goto out;
4749
4750 bpage = *data_page;
4751 if (!bpage)
4752 goto out;
4753
4754 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4755
4756 reader = rb_get_reader_page(cpu_buffer);
4757 if (!reader)
4758 goto out_unlock;
4759
4760 event = rb_reader_event(cpu_buffer);
4761
4762 read = reader->read;
4763 commit = rb_page_commit(reader);
4764
4765
4766 missed_events = cpu_buffer->lost_events;
4767
4768
4769
4770
4771
4772
4773
4774
4775 if (read || (len < (commit - read)) ||
4776 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4777 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4778 unsigned int rpos = read;
4779 unsigned int pos = 0;
4780 unsigned int size;
4781
4782 if (full)
4783 goto out_unlock;
4784
4785 if (len > (commit - read))
4786 len = (commit - read);
4787
4788
4789 size = rb_event_ts_length(event);
4790
4791 if (len < size)
4792 goto out_unlock;
4793
4794
4795 save_timestamp = cpu_buffer->read_stamp;
4796
4797
4798 do {
4799
4800
4801
4802
4803
4804
4805 size = rb_event_length(event);
4806 memcpy(bpage->data + pos, rpage->data + rpos, size);
4807
4808 len -= size;
4809
4810 rb_advance_reader(cpu_buffer);
4811 rpos = reader->read;
4812 pos += size;
4813
4814 if (rpos >= commit)
4815 break;
4816
4817 event = rb_reader_event(cpu_buffer);
4818
4819 size = rb_event_ts_length(event);
4820 } while (len >= size);
4821
4822
4823 local_set(&bpage->commit, pos);
4824 bpage->time_stamp = save_timestamp;
4825
4826
4827 read = 0;
4828 } else {
4829
4830 cpu_buffer->read += rb_page_entries(reader);
4831 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4832
4833
4834 rb_init_page(bpage);
4835 bpage = reader->page;
4836 reader->page = *data_page;
4837 local_set(&reader->write, 0);
4838 local_set(&reader->entries, 0);
4839 reader->read = 0;
4840 *data_page = bpage;
4841
4842
4843
4844
4845
4846
4847 if (reader->real_end)
4848 local_set(&bpage->commit, reader->real_end);
4849 }
4850 ret = read;
4851
4852 cpu_buffer->lost_events = 0;
4853
4854 commit = local_read(&bpage->commit);
4855
4856
4857
4858 if (missed_events) {
4859
4860
4861
4862 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4863 memcpy(&bpage->data[commit], &missed_events,
4864 sizeof(missed_events));
4865 local_add(RB_MISSED_STORED, &bpage->commit);
4866 commit += sizeof(missed_events);
4867 }
4868 local_add(RB_MISSED_EVENTS, &bpage->commit);
4869 }
4870
4871
4872
4873
4874 if (commit < BUF_PAGE_SIZE)
4875 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4876
4877 out_unlock:
4878 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4879
4880 out:
4881 return ret;
4882}
4883EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4884
4885
4886
4887
4888
4889
4890int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
4891{
4892 struct ring_buffer *buffer;
4893 long nr_pages_same;
4894 int cpu_i;
4895 unsigned long nr_pages;
4896
4897 buffer = container_of(node, struct ring_buffer, node);
4898 if (cpumask_test_cpu(cpu, buffer->cpumask))
4899 return 0;
4900
4901 nr_pages = 0;
4902 nr_pages_same = 1;
4903
4904 for_each_buffer_cpu(buffer, cpu_i) {
4905
4906 if (nr_pages == 0)
4907 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4908 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4909 nr_pages_same = 0;
4910 break;
4911 }
4912 }
4913
4914 if (!nr_pages_same)
4915 nr_pages = 2;
4916 buffer->buffers[cpu] =
4917 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4918 if (!buffer->buffers[cpu]) {
4919 WARN(1, "failed to allocate ring buffer on CPU %u\n",
4920 cpu);
4921 return -ENOMEM;
4922 }
4923 smp_wmb();
4924 cpumask_set_cpu(cpu, buffer->cpumask);
4925 return 0;
4926}
4927
4928#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944static struct task_struct *rb_threads[NR_CPUS] __initdata;
4945
4946struct rb_test_data {
4947 struct ring_buffer *buffer;
4948 unsigned long events;
4949 unsigned long bytes_written;
4950 unsigned long bytes_alloc;
4951 unsigned long bytes_dropped;
4952 unsigned long events_nested;
4953 unsigned long bytes_written_nested;
4954 unsigned long bytes_alloc_nested;
4955 unsigned long bytes_dropped_nested;
4956 int min_size_nested;
4957 int max_size_nested;
4958 int max_size;
4959 int min_size;
4960 int cpu;
4961 int cnt;
4962};
4963
4964static struct rb_test_data rb_data[NR_CPUS] __initdata;
4965
4966
4967#define RB_TEST_BUFFER_SIZE 1048576
4968
4969static char rb_string[] __initdata =
4970 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
4971 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
4972 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
4973
4974static bool rb_test_started __initdata;
4975
4976struct rb_item {
4977 int size;
4978 char str[];
4979};
4980
4981static __init int rb_write_something(struct rb_test_data *data, bool nested)
4982{
4983 struct ring_buffer_event *event;
4984 struct rb_item *item;
4985 bool started;
4986 int event_len;
4987 int size;
4988 int len;
4989 int cnt;
4990
4991
4992 cnt = data->cnt + (nested ? 27 : 0);
4993
4994
4995 size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
4996
4997 len = size + sizeof(struct rb_item);
4998
4999 started = rb_test_started;
5000
5001 smp_rmb();
5002
5003 event = ring_buffer_lock_reserve(data->buffer, len);
5004 if (!event) {
5005
5006 if (started) {
5007 if (nested)
5008 data->bytes_dropped += len;
5009 else
5010 data->bytes_dropped_nested += len;
5011 }
5012 return len;
5013 }
5014
5015 event_len = ring_buffer_event_length(event);
5016
5017 if (RB_WARN_ON(data->buffer, event_len < len))
5018 goto out;
5019
5020 item = ring_buffer_event_data(event);
5021 item->size = size;
5022 memcpy(item->str, rb_string, size);
5023
5024 if (nested) {
5025 data->bytes_alloc_nested += event_len;
5026 data->bytes_written_nested += len;
5027 data->events_nested++;
5028 if (!data->min_size_nested || len < data->min_size_nested)
5029 data->min_size_nested = len;
5030 if (len > data->max_size_nested)
5031 data->max_size_nested = len;
5032 } else {
5033 data->bytes_alloc += event_len;
5034 data->bytes_written += len;
5035 data->events++;
5036 if (!data->min_size || len < data->min_size)
5037 data->max_size = len;
5038 if (len > data->max_size)
5039 data->max_size = len;
5040 }
5041
5042 out:
5043 ring_buffer_unlock_commit(data->buffer, event);
5044
5045 return 0;
5046}
5047
5048static __init int rb_test(void *arg)
5049{
5050 struct rb_test_data *data = arg;
5051
5052 while (!kthread_should_stop()) {
5053 rb_write_something(data, false);
5054 data->cnt++;
5055
5056 set_current_state(TASK_INTERRUPTIBLE);
5057
5058 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
5059 }
5060
5061 return 0;
5062}
5063
5064static __init void rb_ipi(void *ignore)
5065{
5066 struct rb_test_data *data;
5067 int cpu = smp_processor_id();
5068
5069 data = &rb_data[cpu];
5070 rb_write_something(data, true);
5071}
5072
5073static __init int rb_hammer_test(void *arg)
5074{
5075 while (!kthread_should_stop()) {
5076
5077
5078 smp_call_function(rb_ipi, NULL, 1);
5079
5080 schedule();
5081 }
5082
5083 return 0;
5084}
5085
5086static __init int test_ringbuffer(void)
5087{
5088 struct task_struct *rb_hammer;
5089 struct ring_buffer *buffer;
5090 int cpu;
5091 int ret = 0;
5092
5093 pr_info("Running ring buffer tests...\n");
5094
5095 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
5096 if (WARN_ON(!buffer))
5097 return 0;
5098
5099
5100 ring_buffer_record_off(buffer);
5101
5102 for_each_online_cpu(cpu) {
5103 rb_data[cpu].buffer = buffer;
5104 rb_data[cpu].cpu = cpu;
5105 rb_data[cpu].cnt = cpu;
5106 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
5107 "rbtester/%d", cpu);
5108 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
5109 pr_cont("FAILED\n");
5110 ret = PTR_ERR(rb_threads[cpu]);
5111 goto out_free;
5112 }
5113
5114 kthread_bind(rb_threads[cpu], cpu);
5115 wake_up_process(rb_threads[cpu]);
5116 }
5117
5118
5119 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
5120 if (WARN_ON(IS_ERR(rb_hammer))) {
5121 pr_cont("FAILED\n");
5122 ret = PTR_ERR(rb_hammer);
5123 goto out_free;
5124 }
5125
5126 ring_buffer_record_on(buffer);
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136 smp_wmb();
5137 rb_test_started = true;
5138
5139 set_current_state(TASK_INTERRUPTIBLE);
5140 ;
5141 schedule_timeout(10 * HZ);
5142
5143 kthread_stop(rb_hammer);
5144
5145 out_free:
5146 for_each_online_cpu(cpu) {
5147 if (!rb_threads[cpu])
5148 break;
5149 kthread_stop(rb_threads[cpu]);
5150 }
5151 if (ret) {
5152 ring_buffer_free(buffer);
5153 return ret;
5154 }
5155
5156
5157 pr_info("finished\n");
5158 for_each_online_cpu(cpu) {
5159 struct ring_buffer_event *event;
5160 struct rb_test_data *data = &rb_data[cpu];
5161 struct rb_item *item;
5162 unsigned long total_events;
5163 unsigned long total_dropped;
5164 unsigned long total_written;
5165 unsigned long total_alloc;
5166 unsigned long total_read = 0;
5167 unsigned long total_size = 0;
5168 unsigned long total_len = 0;
5169 unsigned long total_lost = 0;
5170 unsigned long lost;
5171 int big_event_size;
5172 int small_event_size;
5173
5174 ret = -1;
5175
5176 total_events = data->events + data->events_nested;
5177 total_written = data->bytes_written + data->bytes_written_nested;
5178 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
5179 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
5180
5181 big_event_size = data->max_size + data->max_size_nested;
5182 small_event_size = data->min_size + data->min_size_nested;
5183
5184 pr_info("CPU %d:\n", cpu);
5185 pr_info(" events: %ld\n", total_events);
5186 pr_info(" dropped bytes: %ld\n", total_dropped);
5187 pr_info(" alloced bytes: %ld\n", total_alloc);
5188 pr_info(" written bytes: %ld\n", total_written);
5189 pr_info(" biggest event: %d\n", big_event_size);
5190 pr_info(" smallest event: %d\n", small_event_size);
5191
5192 if (RB_WARN_ON(buffer, total_dropped))
5193 break;
5194
5195 ret = 0;
5196
5197 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
5198 total_lost += lost;
5199 item = ring_buffer_event_data(event);
5200 total_len += ring_buffer_event_length(event);
5201 total_size += item->size + sizeof(struct rb_item);
5202 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
5203 pr_info("FAILED!\n");
5204 pr_info("buffer had: %.*s\n", item->size, item->str);
5205 pr_info("expected: %.*s\n", item->size, rb_string);
5206 RB_WARN_ON(buffer, 1);
5207 ret = -1;
5208 break;
5209 }
5210 total_read++;
5211 }
5212 if (ret)
5213 break;
5214
5215 ret = -1;
5216
5217 pr_info(" read events: %ld\n", total_read);
5218 pr_info(" lost events: %ld\n", total_lost);
5219 pr_info(" total events: %ld\n", total_lost + total_read);
5220 pr_info(" recorded len bytes: %ld\n", total_len);
5221 pr_info(" recorded size bytes: %ld\n", total_size);
5222 if (total_lost)
5223 pr_info(" With dropped events, record len and size may not match\n"
5224 " alloced and written from above\n");
5225 if (!total_lost) {
5226 if (RB_WARN_ON(buffer, total_len != total_alloc ||
5227 total_size != total_written))
5228 break;
5229 }
5230 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
5231 break;
5232
5233 ret = 0;
5234 }
5235 if (!ret)
5236 pr_info("Ring buffer PASSED!\n");
5237
5238 ring_buffer_free(buffer);
5239 return 0;
5240}
5241
5242late_initcall(test_ringbuffer);
5243#endif
5244