1
2
3
4
5
6
7#include <linux/trace_recursion.h>
8#include <linux/trace_events.h>
9#include <linux/ring_buffer.h>
10#include <linux/trace_clock.h>
11#include <linux/sched/clock.h>
12#include <linux/trace_seq.h>
13#include <linux/spinlock.h>
14#include <linux/irq_work.h>
15#include <linux/security.h>
16#include <linux/uaccess.h>
17#include <linux/hardirq.h>
18#include <linux/kthread.h>
19#include <linux/module.h>
20#include <linux/percpu.h>
21#include <linux/mutex.h>
22#include <linux/delay.h>
23#include <linux/slab.h>
24#include <linux/init.h>
25#include <linux/hash.h>
26#include <linux/list.h>
27#include <linux/cpu.h>
28#include <linux/oom.h>
29
30#include <asm/local.h>
31
32static void update_pages_handler(struct work_struct *work);
33
34
35
36
37int ring_buffer_print_entry_header(struct trace_seq *s)
38{
39 trace_seq_puts(s, "# compressed entry header\n");
40 trace_seq_puts(s, "\ttype_len : 5 bits\n");
41 trace_seq_puts(s, "\ttime_delta : 27 bits\n");
42 trace_seq_puts(s, "\tarray : 32 bits\n");
43 trace_seq_putc(s, '\n');
44 trace_seq_printf(s, "\tpadding : type == %d\n",
45 RINGBUF_TYPE_PADDING);
46 trace_seq_printf(s, "\ttime_extend : type == %d\n",
47 RINGBUF_TYPE_TIME_EXTEND);
48 trace_seq_printf(s, "\ttime_stamp : type == %d\n",
49 RINGBUF_TYPE_TIME_STAMP);
50 trace_seq_printf(s, "\tdata max type_len == %d\n",
51 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
52
53 return !trace_seq_has_overflowed(s);
54}
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125#define RB_BUFFER_OFF (1 << 20)
126
127#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
128
129#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
130#define RB_ALIGNMENT 4U
131#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
132#define RB_EVNT_MIN_SIZE 8U
133
134#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
135# define RB_FORCE_8BYTE_ALIGNMENT 0
136# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
137#else
138# define RB_FORCE_8BYTE_ALIGNMENT 1
139# define RB_ARCH_ALIGNMENT 8U
140#endif
141
142#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
143
144
145#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
146
147enum {
148 RB_LEN_TIME_EXTEND = 8,
149 RB_LEN_TIME_STAMP = 8,
150};
151
152#define skip_time_extend(event) \
153 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
154
155#define extended_time(event) \
156 (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
157
158static inline int rb_null_event(struct ring_buffer_event *event)
159{
160 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
161}
162
163static void rb_event_set_padding(struct ring_buffer_event *event)
164{
165
166 event->type_len = RINGBUF_TYPE_PADDING;
167 event->time_delta = 0;
168}
169
170static unsigned
171rb_event_data_length(struct ring_buffer_event *event)
172{
173 unsigned length;
174
175 if (event->type_len)
176 length = event->type_len * RB_ALIGNMENT;
177 else
178 length = event->array[0];
179 return length + RB_EVNT_HDR_SIZE;
180}
181
182
183
184
185
186
187static inline unsigned
188rb_event_length(struct ring_buffer_event *event)
189{
190 switch (event->type_len) {
191 case RINGBUF_TYPE_PADDING:
192 if (rb_null_event(event))
193
194 return -1;
195 return event->array[0] + RB_EVNT_HDR_SIZE;
196
197 case RINGBUF_TYPE_TIME_EXTEND:
198 return RB_LEN_TIME_EXTEND;
199
200 case RINGBUF_TYPE_TIME_STAMP:
201 return RB_LEN_TIME_STAMP;
202
203 case RINGBUF_TYPE_DATA:
204 return rb_event_data_length(event);
205 default:
206 WARN_ON_ONCE(1);
207 }
208
209 return 0;
210}
211
212
213
214
215
216static inline unsigned
217rb_event_ts_length(struct ring_buffer_event *event)
218{
219 unsigned len = 0;
220
221 if (extended_time(event)) {
222
223 len = RB_LEN_TIME_EXTEND;
224 event = skip_time_extend(event);
225 }
226 return len + rb_event_length(event);
227}
228
229
230
231
232
233
234
235
236
237
238
239unsigned ring_buffer_event_length(struct ring_buffer_event *event)
240{
241 unsigned length;
242
243 if (extended_time(event))
244 event = skip_time_extend(event);
245
246 length = rb_event_length(event);
247 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
248 return length;
249 length -= RB_EVNT_HDR_SIZE;
250 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
251 length -= sizeof(event->array[0]);
252 return length;
253}
254EXPORT_SYMBOL_GPL(ring_buffer_event_length);
255
256
257static __always_inline void *
258rb_event_data(struct ring_buffer_event *event)
259{
260 if (extended_time(event))
261 event = skip_time_extend(event);
262 WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
263
264 if (event->type_len)
265 return (void *)&event->array[0];
266
267 return (void *)&event->array[1];
268}
269
270
271
272
273
274void *ring_buffer_event_data(struct ring_buffer_event *event)
275{
276 return rb_event_data(event);
277}
278EXPORT_SYMBOL_GPL(ring_buffer_event_data);
279
280#define for_each_buffer_cpu(buffer, cpu) \
281 for_each_cpu(cpu, buffer->cpumask)
282
283#define for_each_online_buffer_cpu(buffer, cpu) \
284 for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
285
286#define TS_SHIFT 27
287#define TS_MASK ((1ULL << TS_SHIFT) - 1)
288#define TS_DELTA_TEST (~TS_MASK)
289
290static u64 rb_event_time_stamp(struct ring_buffer_event *event)
291{
292 u64 ts;
293
294 ts = event->array[0];
295 ts <<= TS_SHIFT;
296 ts += event->time_delta;
297
298 return ts;
299}
300
301
302#define RB_MISSED_EVENTS (1 << 31)
303
304#define RB_MISSED_STORED (1 << 30)
305
306struct buffer_data_page {
307 u64 time_stamp;
308 local_t commit;
309 unsigned char data[] RB_ALIGN_DATA;
310};
311
312
313
314
315
316
317
318
319
320struct buffer_page {
321 struct list_head list;
322 local_t write;
323 unsigned read;
324 local_t entries;
325 unsigned long real_end;
326 struct buffer_data_page *page;
327};
328
329
330
331
332
333
334
335
336
337
338
339
340
341#define RB_WRITE_MASK 0xfffff
342#define RB_WRITE_INTCNT (1 << 20)
343
344static void rb_init_page(struct buffer_data_page *bpage)
345{
346 local_set(&bpage->commit, 0);
347}
348
349
350
351
352
353static void free_buffer_page(struct buffer_page *bpage)
354{
355 free_page((unsigned long)bpage->page);
356 kfree(bpage);
357}
358
359
360
361
362static inline int test_time_stamp(u64 delta)
363{
364 if (delta & TS_DELTA_TEST)
365 return 1;
366 return 0;
367}
368
369#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
370
371
372#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
373
374int ring_buffer_print_page_header(struct trace_seq *s)
375{
376 struct buffer_data_page field;
377
378 trace_seq_printf(s, "\tfield: u64 timestamp;\t"
379 "offset:0;\tsize:%u;\tsigned:%u;\n",
380 (unsigned int)sizeof(field.time_stamp),
381 (unsigned int)is_signed_type(u64));
382
383 trace_seq_printf(s, "\tfield: local_t commit;\t"
384 "offset:%u;\tsize:%u;\tsigned:%u;\n",
385 (unsigned int)offsetof(typeof(field), commit),
386 (unsigned int)sizeof(field.commit),
387 (unsigned int)is_signed_type(long));
388
389 trace_seq_printf(s, "\tfield: int overwrite;\t"
390 "offset:%u;\tsize:%u;\tsigned:%u;\n",
391 (unsigned int)offsetof(typeof(field), commit),
392 1,
393 (unsigned int)is_signed_type(long));
394
395 trace_seq_printf(s, "\tfield: char data;\t"
396 "offset:%u;\tsize:%u;\tsigned:%u;\n",
397 (unsigned int)offsetof(typeof(field), data),
398 (unsigned int)BUF_PAGE_SIZE,
399 (unsigned int)is_signed_type(char));
400
401 return !trace_seq_has_overflowed(s);
402}
403
404struct rb_irq_work {
405 struct irq_work work;
406 wait_queue_head_t waiters;
407 wait_queue_head_t full_waiters;
408 bool waiters_pending;
409 bool full_waiters_pending;
410 bool wakeup_full;
411};
412
413
414
415
416struct rb_event_info {
417 u64 ts;
418 u64 delta;
419 u64 before;
420 u64 after;
421 unsigned long length;
422 struct buffer_page *tail_page;
423 int add_timestamp;
424};
425
426
427
428
429
430
431
432
433enum {
434 RB_ADD_STAMP_NONE = 0,
435 RB_ADD_STAMP_EXTEND = BIT(1),
436 RB_ADD_STAMP_ABSOLUTE = BIT(2),
437 RB_ADD_STAMP_FORCE = BIT(3)
438};
439
440
441
442
443
444
445
446
447
448
449enum {
450 RB_CTX_TRANSITION,
451 RB_CTX_NMI,
452 RB_CTX_IRQ,
453 RB_CTX_SOFTIRQ,
454 RB_CTX_NORMAL,
455 RB_CTX_MAX
456};
457
458#if BITS_PER_LONG == 32
459#define RB_TIME_32
460#endif
461
462
463
464
465#ifdef RB_TIME_32
466
467struct rb_time_struct {
468 local_t cnt;
469 local_t top;
470 local_t bottom;
471};
472#else
473#include <asm/local64.h>
474struct rb_time_struct {
475 local64_t time;
476};
477#endif
478typedef struct rb_time_struct rb_time_t;
479
480#define MAX_NEST 5
481
482
483
484
485struct ring_buffer_per_cpu {
486 int cpu;
487 atomic_t record_disabled;
488 atomic_t resize_disabled;
489 struct trace_buffer *buffer;
490 raw_spinlock_t reader_lock;
491 arch_spinlock_t lock;
492 struct lock_class_key lock_key;
493 struct buffer_data_page *free_page;
494 unsigned long nr_pages;
495 unsigned int current_context;
496 struct list_head *pages;
497 struct buffer_page *head_page;
498 struct buffer_page *tail_page;
499 struct buffer_page *commit_page;
500 struct buffer_page *reader_page;
501 unsigned long lost_events;
502 unsigned long last_overrun;
503 unsigned long nest;
504 local_t entries_bytes;
505 local_t entries;
506 local_t overrun;
507 local_t commit_overrun;
508 local_t dropped_events;
509 local_t committing;
510 local_t commits;
511 local_t pages_touched;
512 local_t pages_read;
513 long last_pages_touch;
514 size_t shortest_full;
515 unsigned long read;
516 unsigned long read_bytes;
517 rb_time_t write_stamp;
518 rb_time_t before_stamp;
519 u64 event_stamp[MAX_NEST];
520 u64 read_stamp;
521
522 long nr_pages_to_update;
523 struct list_head new_pages;
524 struct work_struct update_pages_work;
525 struct completion update_done;
526
527 struct rb_irq_work irq_work;
528};
529
530struct trace_buffer {
531 unsigned flags;
532 int cpus;
533 atomic_t record_disabled;
534 cpumask_var_t cpumask;
535
536 struct lock_class_key *reader_lock_key;
537
538 struct mutex mutex;
539
540 struct ring_buffer_per_cpu **buffers;
541
542 struct hlist_node node;
543 u64 (*clock)(void);
544
545 struct rb_irq_work irq_work;
546 bool time_stamp_abs;
547};
548
549struct ring_buffer_iter {
550 struct ring_buffer_per_cpu *cpu_buffer;
551 unsigned long head;
552 unsigned long next_event;
553 struct buffer_page *head_page;
554 struct buffer_page *cache_reader_page;
555 unsigned long cache_read;
556 u64 read_stamp;
557 u64 page_stamp;
558 struct ring_buffer_event *event;
559 int missed_events;
560};
561
562#ifdef RB_TIME_32
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595#define RB_TIME_SHIFT 30
596#define RB_TIME_VAL_MASK ((1 << RB_TIME_SHIFT) - 1)
597
598static inline int rb_time_cnt(unsigned long val)
599{
600 return (val >> RB_TIME_SHIFT) & 3;
601}
602
603static inline u64 rb_time_val(unsigned long top, unsigned long bottom)
604{
605 u64 val;
606
607 val = top & RB_TIME_VAL_MASK;
608 val <<= RB_TIME_SHIFT;
609 val |= bottom & RB_TIME_VAL_MASK;
610
611 return val;
612}
613
614static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
615{
616 unsigned long top, bottom;
617 unsigned long c;
618
619
620
621
622
623
624 do {
625 c = local_read(&t->cnt);
626 top = local_read(&t->top);
627 bottom = local_read(&t->bottom);
628 } while (c != local_read(&t->cnt));
629
630 *cnt = rb_time_cnt(top);
631
632
633 if (*cnt != rb_time_cnt(bottom))
634 return false;
635
636 *ret = rb_time_val(top, bottom);
637 return true;
638}
639
640static bool rb_time_read(rb_time_t *t, u64 *ret)
641{
642 unsigned long cnt;
643
644 return __rb_time_read(t, ret, &cnt);
645}
646
647static inline unsigned long rb_time_val_cnt(unsigned long val, unsigned long cnt)
648{
649 return (val & RB_TIME_VAL_MASK) | ((cnt & 3) << RB_TIME_SHIFT);
650}
651
652static inline void rb_time_split(u64 val, unsigned long *top, unsigned long *bottom)
653{
654 *top = (unsigned long)((val >> RB_TIME_SHIFT) & RB_TIME_VAL_MASK);
655 *bottom = (unsigned long)(val & RB_TIME_VAL_MASK);
656}
657
658static inline void rb_time_val_set(local_t *t, unsigned long val, unsigned long cnt)
659{
660 val = rb_time_val_cnt(val, cnt);
661 local_set(t, val);
662}
663
664static void rb_time_set(rb_time_t *t, u64 val)
665{
666 unsigned long cnt, top, bottom;
667
668 rb_time_split(val, &top, &bottom);
669
670
671 do {
672 cnt = local_inc_return(&t->cnt);
673 rb_time_val_set(&t->top, top, cnt);
674 rb_time_val_set(&t->bottom, bottom, cnt);
675 } while (cnt != local_read(&t->cnt));
676}
677
678static inline bool
679rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
680{
681 unsigned long ret;
682
683 ret = local_cmpxchg(l, expect, set);
684 return ret == expect;
685}
686
687static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
688{
689 unsigned long cnt, top, bottom;
690 unsigned long cnt2, top2, bottom2;
691 u64 val;
692
693
694 if (!__rb_time_read(t, &val, &cnt2))
695 return false;
696
697 if (val != expect)
698 return false;
699
700 cnt = local_read(&t->cnt);
701 if ((cnt & 3) != cnt2)
702 return false;
703
704 cnt2 = cnt + 1;
705
706 rb_time_split(val, &top, &bottom);
707 top = rb_time_val_cnt(top, cnt);
708 bottom = rb_time_val_cnt(bottom, cnt);
709
710 rb_time_split(set, &top2, &bottom2);
711 top2 = rb_time_val_cnt(top2, cnt2);
712 bottom2 = rb_time_val_cnt(bottom2, cnt2);
713
714 if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2))
715 return false;
716 if (!rb_time_read_cmpxchg(&t->top, top, top2))
717 return false;
718 if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2))
719 return false;
720 return true;
721}
722
723#else
724
725
726
727static inline bool rb_time_read(rb_time_t *t, u64 *ret)
728{
729 *ret = local64_read(&t->time);
730 return true;
731}
732static void rb_time_set(rb_time_t *t, u64 val)
733{
734 local64_set(&t->time, val);
735}
736
737static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
738{
739 u64 val;
740 val = local64_cmpxchg(&t->time, expect, set);
741 return val == expect;
742}
743#endif
744
745
746
747
748
749
750
751#ifdef RB_VERIFY_EVENT
752static struct list_head *rb_list_head(struct list_head *list);
753static void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
754 void *event)
755{
756 struct buffer_page *page = cpu_buffer->commit_page;
757 struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page);
758 struct list_head *next;
759 long commit, write;
760 unsigned long addr = (unsigned long)event;
761 bool done = false;
762 int stop = 0;
763
764
765 do {
766 if (page == tail_page || WARN_ON_ONCE(stop++ > 100))
767 done = true;
768 commit = local_read(&page->page->commit);
769 write = local_read(&page->write);
770 if (addr >= (unsigned long)&page->page->data[commit] &&
771 addr < (unsigned long)&page->page->data[write])
772 return;
773
774 next = rb_list_head(page->list.next);
775 page = list_entry(next, struct buffer_page, list);
776 } while (!done);
777 WARN_ON_ONCE(1);
778}
779#else
780static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
781 void *event)
782{
783}
784#endif
785
786
787static inline u64 rb_time_stamp(struct trace_buffer *buffer);
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer,
807 struct ring_buffer_event *event)
808{
809 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()];
810 unsigned int nest;
811 u64 ts;
812
813
814 if (event->type_len == RINGBUF_TYPE_TIME_STAMP)
815 return rb_event_time_stamp(event);
816
817 nest = local_read(&cpu_buffer->committing);
818 verify_event(cpu_buffer, event);
819 if (WARN_ON_ONCE(!nest))
820 goto fail;
821
822
823 if (likely(--nest < MAX_NEST))
824 return cpu_buffer->event_stamp[nest];
825
826
827 WARN_ONCE(1, "nest (%d) greater than max", nest);
828
829 fail:
830
831 if (!rb_time_read(&cpu_buffer->write_stamp, &ts))
832
833 ts = rb_time_stamp(cpu_buffer->buffer);
834
835 return ts;
836}
837
838
839
840
841
842
843
844
845size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
846{
847 return buffer->buffers[cpu]->nr_pages;
848}
849
850
851
852
853
854
855
856
857size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
858{
859 size_t read;
860 size_t cnt;
861
862 read = local_read(&buffer->buffers[cpu]->pages_read);
863 cnt = local_read(&buffer->buffers[cpu]->pages_touched);
864
865 if (cnt < read) {
866 WARN_ON_ONCE(read > cnt + 1);
867 return 0;
868 }
869
870 return cnt - read;
871}
872
873
874
875
876
877
878
879static void rb_wake_up_waiters(struct irq_work *work)
880{
881 struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
882
883 wake_up_all(&rbwork->waiters);
884 if (rbwork->wakeup_full) {
885 rbwork->wakeup_full = false;
886 wake_up_all(&rbwork->full_waiters);
887 }
888}
889
890
891
892
893
894
895
896
897
898
899
900int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
901{
902 struct ring_buffer_per_cpu *cpu_buffer;
903 DEFINE_WAIT(wait);
904 struct rb_irq_work *work;
905 int ret = 0;
906
907
908
909
910
911
912 if (cpu == RING_BUFFER_ALL_CPUS) {
913 work = &buffer->irq_work;
914
915 full = 0;
916 } else {
917 if (!cpumask_test_cpu(cpu, buffer->cpumask))
918 return -ENODEV;
919 cpu_buffer = buffer->buffers[cpu];
920 work = &cpu_buffer->irq_work;
921 }
922
923
924 while (true) {
925 if (full)
926 prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
927 else
928 prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950 if (full)
951 work->full_waiters_pending = true;
952 else
953 work->waiters_pending = true;
954
955 if (signal_pending(current)) {
956 ret = -EINTR;
957 break;
958 }
959
960 if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
961 break;
962
963 if (cpu != RING_BUFFER_ALL_CPUS &&
964 !ring_buffer_empty_cpu(buffer, cpu)) {
965 unsigned long flags;
966 bool pagebusy;
967 size_t nr_pages;
968 size_t dirty;
969
970 if (!full)
971 break;
972
973 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
974 pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
975 nr_pages = cpu_buffer->nr_pages;
976 dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
977 if (!cpu_buffer->shortest_full ||
978 cpu_buffer->shortest_full < full)
979 cpu_buffer->shortest_full = full;
980 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
981 if (!pagebusy &&
982 (!nr_pages || (dirty * 100) > full * nr_pages))
983 break;
984 }
985
986 schedule();
987 }
988
989 if (full)
990 finish_wait(&work->full_waiters, &wait);
991 else
992 finish_wait(&work->waiters, &wait);
993
994 return ret;
995}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
1012 struct file *filp, poll_table *poll_table)
1013{
1014 struct ring_buffer_per_cpu *cpu_buffer;
1015 struct rb_irq_work *work;
1016
1017 if (cpu == RING_BUFFER_ALL_CPUS)
1018 work = &buffer->irq_work;
1019 else {
1020 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1021 return -EINVAL;
1022
1023 cpu_buffer = buffer->buffers[cpu];
1024 work = &cpu_buffer->irq_work;
1025 }
1026
1027 poll_wait(filp, &work->waiters, poll_table);
1028 work->waiters_pending = true;
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042 smp_mb();
1043
1044 if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
1045 (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
1046 return EPOLLIN | EPOLLRDNORM;
1047 return 0;
1048}
1049
1050
1051#define RB_WARN_ON(b, cond) \
1052 ({ \
1053 int _____ret = unlikely(cond); \
1054 if (_____ret) { \
1055 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
1056 struct ring_buffer_per_cpu *__b = \
1057 (void *)b; \
1058 atomic_inc(&__b->buffer->record_disabled); \
1059 } else \
1060 atomic_inc(&b->record_disabled); \
1061 WARN_ON(1); \
1062 } \
1063 _____ret; \
1064 })
1065
1066
1067#define DEBUG_SHIFT 0
1068
1069static inline u64 rb_time_stamp(struct trace_buffer *buffer)
1070{
1071 u64 ts;
1072
1073
1074 if (IS_ENABLED(CONFIG_RETPOLINE) && likely(buffer->clock == trace_clock_local))
1075 ts = trace_clock_local();
1076 else
1077 ts = buffer->clock();
1078
1079
1080 return ts << DEBUG_SHIFT;
1081}
1082
1083u64 ring_buffer_time_stamp(struct trace_buffer *buffer)
1084{
1085 u64 time;
1086
1087 preempt_disable_notrace();
1088 time = rb_time_stamp(buffer);
1089 preempt_enable_notrace();
1090
1091 return time;
1092}
1093EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
1094
1095void ring_buffer_normalize_time_stamp(struct trace_buffer *buffer,
1096 int cpu, u64 *ts)
1097{
1098
1099 *ts >>= DEBUG_SHIFT;
1100}
1101EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172#define RB_PAGE_NORMAL 0UL
1173#define RB_PAGE_HEAD 1UL
1174#define RB_PAGE_UPDATE 2UL
1175
1176
1177#define RB_FLAG_MASK 3UL
1178
1179
1180#define RB_PAGE_MOVED 4UL
1181
1182
1183
1184
1185static struct list_head *rb_list_head(struct list_head *list)
1186{
1187 unsigned long val = (unsigned long)list;
1188
1189 return (struct list_head *)(val & ~RB_FLAG_MASK);
1190}
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200static inline int
1201rb_is_head_page(struct buffer_page *page, struct list_head *list)
1202{
1203 unsigned long val;
1204
1205 val = (unsigned long)list->next;
1206
1207 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
1208 return RB_PAGE_MOVED;
1209
1210 return val & RB_FLAG_MASK;
1211}
1212
1213
1214
1215
1216
1217
1218
1219
1220static bool rb_is_reader_page(struct buffer_page *page)
1221{
1222 struct list_head *list = page->list.prev;
1223
1224 return rb_list_head(list->next) != &page->list;
1225}
1226
1227
1228
1229
1230static void rb_set_list_to_head(struct list_head *list)
1231{
1232 unsigned long *ptr;
1233
1234 ptr = (unsigned long *)&list->next;
1235 *ptr |= RB_PAGE_HEAD;
1236 *ptr &= ~RB_PAGE_UPDATE;
1237}
1238
1239
1240
1241
1242static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
1243{
1244 struct buffer_page *head;
1245
1246 head = cpu_buffer->head_page;
1247 if (!head)
1248 return;
1249
1250
1251
1252
1253 rb_set_list_to_head(head->list.prev);
1254}
1255
1256static void rb_list_head_clear(struct list_head *list)
1257{
1258 unsigned long *ptr = (unsigned long *)&list->next;
1259
1260 *ptr &= ~RB_FLAG_MASK;
1261}
1262
1263
1264
1265
1266static void
1267rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
1268{
1269 struct list_head *hd;
1270
1271
1272 rb_list_head_clear(cpu_buffer->pages);
1273
1274 list_for_each(hd, cpu_buffer->pages)
1275 rb_list_head_clear(hd);
1276}
1277
1278static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
1279 struct buffer_page *head,
1280 struct buffer_page *prev,
1281 int old_flag, int new_flag)
1282{
1283 struct list_head *list;
1284 unsigned long val = (unsigned long)&head->list;
1285 unsigned long ret;
1286
1287 list = &prev->list;
1288
1289 val &= ~RB_FLAG_MASK;
1290
1291 ret = cmpxchg((unsigned long *)&list->next,
1292 val | old_flag, val | new_flag);
1293
1294
1295 if ((ret & ~RB_FLAG_MASK) != val)
1296 return RB_PAGE_MOVED;
1297
1298 return ret & RB_FLAG_MASK;
1299}
1300
1301static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
1302 struct buffer_page *head,
1303 struct buffer_page *prev,
1304 int old_flag)
1305{
1306 return rb_head_page_set(cpu_buffer, head, prev,
1307 old_flag, RB_PAGE_UPDATE);
1308}
1309
1310static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
1311 struct buffer_page *head,
1312 struct buffer_page *prev,
1313 int old_flag)
1314{
1315 return rb_head_page_set(cpu_buffer, head, prev,
1316 old_flag, RB_PAGE_HEAD);
1317}
1318
1319static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
1320 struct buffer_page *head,
1321 struct buffer_page *prev,
1322 int old_flag)
1323{
1324 return rb_head_page_set(cpu_buffer, head, prev,
1325 old_flag, RB_PAGE_NORMAL);
1326}
1327
1328static inline void rb_inc_page(struct buffer_page **bpage)
1329{
1330 struct list_head *p = rb_list_head((*bpage)->list.next);
1331
1332 *bpage = list_entry(p, struct buffer_page, list);
1333}
1334
1335static struct buffer_page *
1336rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
1337{
1338 struct buffer_page *head;
1339 struct buffer_page *page;
1340 struct list_head *list;
1341 int i;
1342
1343 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
1344 return NULL;
1345
1346
1347 list = cpu_buffer->pages;
1348 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
1349 return NULL;
1350
1351 page = head = cpu_buffer->head_page;
1352
1353
1354
1355
1356
1357
1358 for (i = 0; i < 3; i++) {
1359 do {
1360 if (rb_is_head_page(page, page->list.prev)) {
1361 cpu_buffer->head_page = page;
1362 return page;
1363 }
1364 rb_inc_page(&page);
1365 } while (page != head);
1366 }
1367
1368 RB_WARN_ON(cpu_buffer, 1);
1369
1370 return NULL;
1371}
1372
1373static int rb_head_page_replace(struct buffer_page *old,
1374 struct buffer_page *new)
1375{
1376 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
1377 unsigned long val;
1378 unsigned long ret;
1379
1380 val = *ptr & ~RB_FLAG_MASK;
1381 val |= RB_PAGE_HEAD;
1382
1383 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
1384
1385 return ret == val;
1386}
1387
1388
1389
1390
1391static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
1392 struct buffer_page *tail_page,
1393 struct buffer_page *next_page)
1394{
1395 unsigned long old_entries;
1396 unsigned long old_write;
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
1408 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
1409
1410 local_inc(&cpu_buffer->pages_touched);
1411
1412
1413
1414
1415 barrier();
1416
1417
1418
1419
1420
1421
1422 if (tail_page == READ_ONCE(cpu_buffer->tail_page)) {
1423
1424 unsigned long val = old_write & ~RB_WRITE_MASK;
1425 unsigned long eval = old_entries & ~RB_WRITE_MASK;
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437 (void)local_cmpxchg(&next_page->write, old_write, val);
1438 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
1439
1440
1441
1442
1443
1444
1445 local_set(&next_page->page->commit, 0);
1446
1447
1448 (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
1449 }
1450}
1451
1452static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1453 struct buffer_page *bpage)
1454{
1455 unsigned long val = (unsigned long)bpage;
1456
1457 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
1458 return 1;
1459
1460 return 0;
1461}
1462
1463
1464
1465
1466static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
1467 struct list_head *list)
1468{
1469 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
1470 return 1;
1471 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
1472 return 1;
1473 return 0;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
1484{
1485 struct list_head *head = cpu_buffer->pages;
1486 struct buffer_page *bpage, *tmp;
1487
1488
1489 if (cpu_buffer->head_page)
1490 rb_set_head_page(cpu_buffer);
1491
1492 rb_head_page_deactivate(cpu_buffer);
1493
1494 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
1495 return -1;
1496 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
1497 return -1;
1498
1499 if (rb_check_list(cpu_buffer, head))
1500 return -1;
1501
1502 list_for_each_entry_safe(bpage, tmp, head, list) {
1503 if (RB_WARN_ON(cpu_buffer,
1504 bpage->list.next->prev != &bpage->list))
1505 return -1;
1506 if (RB_WARN_ON(cpu_buffer,
1507 bpage->list.prev->next != &bpage->list))
1508 return -1;
1509 if (rb_check_list(cpu_buffer, &bpage->list))
1510 return -1;
1511 }
1512
1513 rb_head_page_activate(cpu_buffer);
1514
1515 return 0;
1516}
1517
1518static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1519 long nr_pages, struct list_head *pages)
1520{
1521 struct buffer_page *bpage, *tmp;
1522 bool user_thread = current->mm != NULL;
1523 gfp_t mflags;
1524 long i;
1525
1526
1527
1528
1529
1530
1531
1532
1533 i = si_mem_available();
1534 if (i < nr_pages)
1535 return -ENOMEM;
1536
1537
1538
1539
1540
1541
1542 mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553 if (user_thread)
1554 set_current_oom_origin();
1555 for (i = 0; i < nr_pages; i++) {
1556 struct page *page;
1557
1558 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1559 mflags, cpu_to_node(cpu_buffer->cpu));
1560 if (!bpage)
1561 goto free_pages;
1562
1563 rb_check_bpage(cpu_buffer, bpage);
1564
1565 list_add(&bpage->list, pages);
1566
1567 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu), mflags, 0);
1568 if (!page)
1569 goto free_pages;
1570 bpage->page = page_address(page);
1571 rb_init_page(bpage->page);
1572
1573 if (user_thread && fatal_signal_pending(current))
1574 goto free_pages;
1575 }
1576 if (user_thread)
1577 clear_current_oom_origin();
1578
1579 return 0;
1580
1581free_pages:
1582 list_for_each_entry_safe(bpage, tmp, pages, list) {
1583 list_del_init(&bpage->list);
1584 free_buffer_page(bpage);
1585 }
1586 if (user_thread)
1587 clear_current_oom_origin();
1588
1589 return -ENOMEM;
1590}
1591
1592static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1593 unsigned long nr_pages)
1594{
1595 LIST_HEAD(pages);
1596
1597 WARN_ON(!nr_pages);
1598
1599 if (__rb_allocate_pages(cpu_buffer, nr_pages, &pages))
1600 return -ENOMEM;
1601
1602
1603
1604
1605
1606
1607 cpu_buffer->pages = pages.next;
1608 list_del(&pages);
1609
1610 cpu_buffer->nr_pages = nr_pages;
1611
1612 rb_check_pages(cpu_buffer);
1613
1614 return 0;
1615}
1616
1617static struct ring_buffer_per_cpu *
1618rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
1619{
1620 struct ring_buffer_per_cpu *cpu_buffer;
1621 struct buffer_page *bpage;
1622 struct page *page;
1623 int ret;
1624
1625 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1626 GFP_KERNEL, cpu_to_node(cpu));
1627 if (!cpu_buffer)
1628 return NULL;
1629
1630 cpu_buffer->cpu = cpu;
1631 cpu_buffer->buffer = buffer;
1632 raw_spin_lock_init(&cpu_buffer->reader_lock);
1633 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1634 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1635 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1636 init_completion(&cpu_buffer->update_done);
1637 init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
1638 init_waitqueue_head(&cpu_buffer->irq_work.waiters);
1639 init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
1640
1641 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1642 GFP_KERNEL, cpu_to_node(cpu));
1643 if (!bpage)
1644 goto fail_free_buffer;
1645
1646 rb_check_bpage(cpu_buffer, bpage);
1647
1648 cpu_buffer->reader_page = bpage;
1649 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1650 if (!page)
1651 goto fail_free_reader;
1652 bpage->page = page_address(page);
1653 rb_init_page(bpage->page);
1654
1655 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1656 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1657
1658 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1659 if (ret < 0)
1660 goto fail_free_reader;
1661
1662 cpu_buffer->head_page
1663 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1664 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1665
1666 rb_head_page_activate(cpu_buffer);
1667
1668 return cpu_buffer;
1669
1670 fail_free_reader:
1671 free_buffer_page(cpu_buffer->reader_page);
1672
1673 fail_free_buffer:
1674 kfree(cpu_buffer);
1675 return NULL;
1676}
1677
1678static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1679{
1680 struct list_head *head = cpu_buffer->pages;
1681 struct buffer_page *bpage, *tmp;
1682
1683 free_buffer_page(cpu_buffer->reader_page);
1684
1685 rb_head_page_deactivate(cpu_buffer);
1686
1687 if (head) {
1688 list_for_each_entry_safe(bpage, tmp, head, list) {
1689 list_del_init(&bpage->list);
1690 free_buffer_page(bpage);
1691 }
1692 bpage = list_entry(head, struct buffer_page, list);
1693 free_buffer_page(bpage);
1694 }
1695
1696 kfree(cpu_buffer);
1697}
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1711 struct lock_class_key *key)
1712{
1713 struct trace_buffer *buffer;
1714 long nr_pages;
1715 int bsize;
1716 int cpu;
1717 int ret;
1718
1719
1720 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1721 GFP_KERNEL);
1722 if (!buffer)
1723 return NULL;
1724
1725 if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1726 goto fail_free_buffer;
1727
1728 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1729 buffer->flags = flags;
1730 buffer->clock = trace_clock_local;
1731 buffer->reader_lock_key = key;
1732
1733 init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
1734 init_waitqueue_head(&buffer->irq_work.waiters);
1735
1736
1737 if (nr_pages < 2)
1738 nr_pages = 2;
1739
1740 buffer->cpus = nr_cpu_ids;
1741
1742 bsize = sizeof(void *) * nr_cpu_ids;
1743 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1744 GFP_KERNEL);
1745 if (!buffer->buffers)
1746 goto fail_free_cpumask;
1747
1748 cpu = raw_smp_processor_id();
1749 cpumask_set_cpu(cpu, buffer->cpumask);
1750 buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1751 if (!buffer->buffers[cpu])
1752 goto fail_free_buffers;
1753
1754 ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1755 if (ret < 0)
1756 goto fail_free_buffers;
1757
1758 mutex_init(&buffer->mutex);
1759
1760 return buffer;
1761
1762 fail_free_buffers:
1763 for_each_buffer_cpu(buffer, cpu) {
1764 if (buffer->buffers[cpu])
1765 rb_free_cpu_buffer(buffer->buffers[cpu]);
1766 }
1767 kfree(buffer->buffers);
1768
1769 fail_free_cpumask:
1770 free_cpumask_var(buffer->cpumask);
1771
1772 fail_free_buffer:
1773 kfree(buffer);
1774 return NULL;
1775}
1776EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1777
1778
1779
1780
1781
1782void
1783ring_buffer_free(struct trace_buffer *buffer)
1784{
1785 int cpu;
1786
1787 cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
1788
1789 for_each_buffer_cpu(buffer, cpu)
1790 rb_free_cpu_buffer(buffer->buffers[cpu]);
1791
1792 kfree(buffer->buffers);
1793 free_cpumask_var(buffer->cpumask);
1794
1795 kfree(buffer);
1796}
1797EXPORT_SYMBOL_GPL(ring_buffer_free);
1798
1799void ring_buffer_set_clock(struct trace_buffer *buffer,
1800 u64 (*clock)(void))
1801{
1802 buffer->clock = clock;
1803}
1804
1805void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs)
1806{
1807 buffer->time_stamp_abs = abs;
1808}
1809
1810bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer)
1811{
1812 return buffer->time_stamp_abs;
1813}
1814
1815static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1816
1817static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1818{
1819 return local_read(&bpage->entries) & RB_WRITE_MASK;
1820}
1821
1822static inline unsigned long rb_page_write(struct buffer_page *bpage)
1823{
1824 return local_read(&bpage->write) & RB_WRITE_MASK;
1825}
1826
1827static int
1828rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
1829{
1830 struct list_head *tail_page, *to_remove, *next_page;
1831 struct buffer_page *to_remove_page, *tmp_iter_page;
1832 struct buffer_page *last_page, *first_page;
1833 unsigned long nr_removed;
1834 unsigned long head_bit;
1835 int page_entries;
1836
1837 head_bit = 0;
1838
1839 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1840 atomic_inc(&cpu_buffer->record_disabled);
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850 tail_page = &cpu_buffer->tail_page->list;
1851
1852
1853
1854
1855
1856 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1857 tail_page = rb_list_head(tail_page->next);
1858 to_remove = tail_page;
1859
1860
1861 first_page = list_entry(rb_list_head(to_remove->next),
1862 struct buffer_page, list);
1863
1864 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1865 to_remove = rb_list_head(to_remove)->next;
1866 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1867 }
1868
1869 next_page = rb_list_head(to_remove)->next;
1870
1871
1872
1873
1874
1875
1876 tail_page->next = (struct list_head *)((unsigned long)next_page |
1877 head_bit);
1878 next_page = rb_list_head(next_page);
1879 next_page->prev = tail_page;
1880
1881
1882 cpu_buffer->pages = next_page;
1883
1884
1885 if (head_bit)
1886 cpu_buffer->head_page = list_entry(next_page,
1887 struct buffer_page, list);
1888
1889
1890
1891
1892
1893 cpu_buffer->read = 0;
1894
1895
1896 atomic_dec(&cpu_buffer->record_disabled);
1897 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1898
1899 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1900
1901
1902 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1903 list);
1904 tmp_iter_page = first_page;
1905
1906 do {
1907 cond_resched();
1908
1909 to_remove_page = tmp_iter_page;
1910 rb_inc_page(&tmp_iter_page);
1911
1912
1913 page_entries = rb_page_entries(to_remove_page);
1914 if (page_entries) {
1915
1916
1917
1918
1919
1920
1921 local_add(page_entries, &cpu_buffer->overrun);
1922 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1923 }
1924
1925
1926
1927
1928
1929 free_buffer_page(to_remove_page);
1930 nr_removed--;
1931
1932 } while (to_remove_page != last_page);
1933
1934 RB_WARN_ON(cpu_buffer, nr_removed);
1935
1936 return nr_removed == 0;
1937}
1938
1939static int
1940rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1941{
1942 struct list_head *pages = &cpu_buffer->new_pages;
1943 int retries, success;
1944
1945 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960 retries = 10;
1961 success = 0;
1962 while (retries--) {
1963 struct list_head *head_page, *prev_page, *r;
1964 struct list_head *last_page, *first_page;
1965 struct list_head *head_page_with_bit;
1966
1967 head_page = &rb_set_head_page(cpu_buffer)->list;
1968 if (!head_page)
1969 break;
1970 prev_page = head_page->prev;
1971
1972 first_page = pages->next;
1973 last_page = pages->prev;
1974
1975 head_page_with_bit = (struct list_head *)
1976 ((unsigned long)head_page | RB_PAGE_HEAD);
1977
1978 last_page->next = head_page_with_bit;
1979 first_page->prev = prev_page;
1980
1981 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1982
1983 if (r == head_page_with_bit) {
1984
1985
1986
1987
1988
1989 head_page->prev = last_page;
1990 success = 1;
1991 break;
1992 }
1993 }
1994
1995 if (success)
1996 INIT_LIST_HEAD(pages);
1997
1998
1999
2000
2001 RB_WARN_ON(cpu_buffer, !success);
2002 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
2003
2004
2005 if (!success) {
2006 struct buffer_page *bpage, *tmp;
2007 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
2008 list) {
2009 list_del_init(&bpage->list);
2010 free_buffer_page(bpage);
2011 }
2012 }
2013 return success;
2014}
2015
2016static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
2017{
2018 int success;
2019
2020 if (cpu_buffer->nr_pages_to_update > 0)
2021 success = rb_insert_pages(cpu_buffer);
2022 else
2023 success = rb_remove_pages(cpu_buffer,
2024 -cpu_buffer->nr_pages_to_update);
2025
2026 if (success)
2027 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
2028}
2029
2030static void update_pages_handler(struct work_struct *work)
2031{
2032 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
2033 struct ring_buffer_per_cpu, update_pages_work);
2034 rb_update_pages(cpu_buffer);
2035 complete(&cpu_buffer->update_done);
2036}
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
2049 int cpu_id)
2050{
2051 struct ring_buffer_per_cpu *cpu_buffer;
2052 unsigned long nr_pages;
2053 int cpu, err;
2054
2055
2056
2057
2058 if (!buffer)
2059 return 0;
2060
2061
2062 if (cpu_id != RING_BUFFER_ALL_CPUS &&
2063 !cpumask_test_cpu(cpu_id, buffer->cpumask))
2064 return 0;
2065
2066 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
2067
2068
2069 if (nr_pages < 2)
2070 nr_pages = 2;
2071
2072
2073 mutex_lock(&buffer->mutex);
2074
2075
2076 if (cpu_id == RING_BUFFER_ALL_CPUS) {
2077
2078
2079
2080
2081
2082 for_each_buffer_cpu(buffer, cpu) {
2083 cpu_buffer = buffer->buffers[cpu];
2084 if (atomic_read(&cpu_buffer->resize_disabled)) {
2085 err = -EBUSY;
2086 goto out_err_unlock;
2087 }
2088 }
2089
2090
2091 for_each_buffer_cpu(buffer, cpu) {
2092 cpu_buffer = buffer->buffers[cpu];
2093
2094 cpu_buffer->nr_pages_to_update = nr_pages -
2095 cpu_buffer->nr_pages;
2096
2097
2098
2099 if (cpu_buffer->nr_pages_to_update <= 0)
2100 continue;
2101
2102
2103
2104
2105 INIT_LIST_HEAD(&cpu_buffer->new_pages);
2106 if (__rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
2107 &cpu_buffer->new_pages)) {
2108
2109 err = -ENOMEM;
2110 goto out_err;
2111 }
2112 }
2113
2114 cpus_read_lock();
2115
2116
2117
2118
2119
2120 for_each_buffer_cpu(buffer, cpu) {
2121 cpu_buffer = buffer->buffers[cpu];
2122 if (!cpu_buffer->nr_pages_to_update)
2123 continue;
2124
2125
2126 if (!cpu_online(cpu)) {
2127 rb_update_pages(cpu_buffer);
2128 cpu_buffer->nr_pages_to_update = 0;
2129 } else {
2130 schedule_work_on(cpu,
2131 &cpu_buffer->update_pages_work);
2132 }
2133 }
2134
2135
2136 for_each_buffer_cpu(buffer, cpu) {
2137 cpu_buffer = buffer->buffers[cpu];
2138 if (!cpu_buffer->nr_pages_to_update)
2139 continue;
2140
2141 if (cpu_online(cpu))
2142 wait_for_completion(&cpu_buffer->update_done);
2143 cpu_buffer->nr_pages_to_update = 0;
2144 }
2145
2146 cpus_read_unlock();
2147 } else {
2148 cpu_buffer = buffer->buffers[cpu_id];
2149
2150 if (nr_pages == cpu_buffer->nr_pages)
2151 goto out;
2152
2153
2154
2155
2156
2157
2158 if (atomic_read(&cpu_buffer->resize_disabled)) {
2159 err = -EBUSY;
2160 goto out_err_unlock;
2161 }
2162
2163 cpu_buffer->nr_pages_to_update = nr_pages -
2164 cpu_buffer->nr_pages;
2165
2166 INIT_LIST_HEAD(&cpu_buffer->new_pages);
2167 if (cpu_buffer->nr_pages_to_update > 0 &&
2168 __rb_allocate_pages(cpu_buffer, cpu_buffer->nr_pages_to_update,
2169 &cpu_buffer->new_pages)) {
2170 err = -ENOMEM;
2171 goto out_err;
2172 }
2173
2174 cpus_read_lock();
2175
2176
2177 if (!cpu_online(cpu_id))
2178 rb_update_pages(cpu_buffer);
2179 else {
2180 schedule_work_on(cpu_id,
2181 &cpu_buffer->update_pages_work);
2182 wait_for_completion(&cpu_buffer->update_done);
2183 }
2184
2185 cpu_buffer->nr_pages_to_update = 0;
2186 cpus_read_unlock();
2187 }
2188
2189 out:
2190
2191
2192
2193
2194
2195
2196
2197 if (atomic_read(&buffer->record_disabled)) {
2198 atomic_inc(&buffer->record_disabled);
2199
2200
2201
2202
2203
2204
2205 synchronize_rcu();
2206 for_each_buffer_cpu(buffer, cpu) {
2207 cpu_buffer = buffer->buffers[cpu];
2208 rb_check_pages(cpu_buffer);
2209 }
2210 atomic_dec(&buffer->record_disabled);
2211 }
2212
2213 mutex_unlock(&buffer->mutex);
2214 return 0;
2215
2216 out_err:
2217 for_each_buffer_cpu(buffer, cpu) {
2218 struct buffer_page *bpage, *tmp;
2219
2220 cpu_buffer = buffer->buffers[cpu];
2221 cpu_buffer->nr_pages_to_update = 0;
2222
2223 if (list_empty(&cpu_buffer->new_pages))
2224 continue;
2225
2226 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
2227 list) {
2228 list_del_init(&bpage->list);
2229 free_buffer_page(bpage);
2230 }
2231 }
2232 out_err_unlock:
2233 mutex_unlock(&buffer->mutex);
2234 return err;
2235}
2236EXPORT_SYMBOL_GPL(ring_buffer_resize);
2237
2238void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val)
2239{
2240 mutex_lock(&buffer->mutex);
2241 if (val)
2242 buffer->flags |= RB_FL_OVERWRITE;
2243 else
2244 buffer->flags &= ~RB_FL_OVERWRITE;
2245 mutex_unlock(&buffer->mutex);
2246}
2247EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
2248
2249static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
2250{
2251 return bpage->page->data + index;
2252}
2253
2254static __always_inline struct ring_buffer_event *
2255rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
2256{
2257 return __rb_page_index(cpu_buffer->reader_page,
2258 cpu_buffer->reader_page->read);
2259}
2260
2261static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
2262{
2263 return local_read(&bpage->page->commit);
2264}
2265
2266static struct ring_buffer_event *
2267rb_iter_head_event(struct ring_buffer_iter *iter)
2268{
2269 struct ring_buffer_event *event;
2270 struct buffer_page *iter_head_page = iter->head_page;
2271 unsigned long commit;
2272 unsigned length;
2273
2274 if (iter->head != iter->next_event)
2275 return iter->event;
2276
2277
2278
2279
2280
2281
2282 commit = rb_page_commit(iter_head_page);
2283 smp_rmb();
2284 event = __rb_page_index(iter_head_page, iter->head);
2285 length = rb_event_length(event);
2286
2287
2288
2289
2290
2291 barrier();
2292
2293 if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
2294
2295 goto reset;
2296
2297 memcpy(iter->event, event, length);
2298
2299
2300
2301
2302 smp_rmb();
2303
2304
2305 if (iter->page_stamp != iter_head_page->page->time_stamp ||
2306 commit > rb_page_commit(iter_head_page))
2307 goto reset;
2308
2309 iter->next_event = iter->head + length;
2310 return iter->event;
2311 reset:
2312
2313 iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
2314 iter->head = 0;
2315 iter->next_event = 0;
2316 iter->missed_events = 1;
2317 return NULL;
2318}
2319
2320
2321static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
2322{
2323 return rb_page_commit(bpage);
2324}
2325
2326static __always_inline unsigned
2327rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
2328{
2329 return rb_page_commit(cpu_buffer->commit_page);
2330}
2331
2332static __always_inline unsigned
2333rb_event_index(struct ring_buffer_event *event)
2334{
2335 unsigned long addr = (unsigned long)event;
2336
2337 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
2338}
2339
2340static void rb_inc_iter(struct ring_buffer_iter *iter)
2341{
2342 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2343
2344
2345
2346
2347
2348
2349
2350 if (iter->head_page == cpu_buffer->reader_page)
2351 iter->head_page = rb_set_head_page(cpu_buffer);
2352 else
2353 rb_inc_page(&iter->head_page);
2354
2355 iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
2356 iter->head = 0;
2357 iter->next_event = 0;
2358}
2359
2360
2361
2362
2363
2364
2365
2366
2367static int
2368rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
2369 struct buffer_page *tail_page,
2370 struct buffer_page *next_page)
2371{
2372 struct buffer_page *new_head;
2373 int entries;
2374 int type;
2375 int ret;
2376
2377 entries = rb_page_entries(next_page);
2378
2379
2380
2381
2382
2383
2384 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
2385 RB_PAGE_HEAD);
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398 switch (type) {
2399 case RB_PAGE_HEAD:
2400
2401
2402
2403
2404
2405 local_add(entries, &cpu_buffer->overrun);
2406 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
2407
2408
2409
2410
2411
2412
2413
2414 break;
2415
2416 case RB_PAGE_UPDATE:
2417
2418
2419
2420
2421 break;
2422 case RB_PAGE_NORMAL:
2423
2424
2425
2426
2427
2428 return 1;
2429 case RB_PAGE_MOVED:
2430
2431
2432
2433
2434
2435 return 1;
2436 default:
2437 RB_WARN_ON(cpu_buffer, 1);
2438 return -1;
2439 }
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455 new_head = next_page;
2456 rb_inc_page(&new_head);
2457
2458 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
2459 RB_PAGE_NORMAL);
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469 switch (ret) {
2470 case RB_PAGE_HEAD:
2471 case RB_PAGE_NORMAL:
2472
2473 break;
2474 default:
2475 RB_WARN_ON(cpu_buffer, 1);
2476 return -1;
2477 }
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489 if (ret == RB_PAGE_NORMAL) {
2490 struct buffer_page *buffer_tail_page;
2491
2492 buffer_tail_page = READ_ONCE(cpu_buffer->tail_page);
2493
2494
2495
2496
2497 if (buffer_tail_page != tail_page &&
2498 buffer_tail_page != next_page)
2499 rb_head_page_set_normal(cpu_buffer, new_head,
2500 next_page,
2501 RB_PAGE_HEAD);
2502 }
2503
2504
2505
2506
2507
2508
2509 if (type == RB_PAGE_HEAD) {
2510 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2511 tail_page,
2512 RB_PAGE_UPDATE);
2513 if (RB_WARN_ON(cpu_buffer,
2514 ret != RB_PAGE_UPDATE))
2515 return -1;
2516 }
2517
2518 return 0;
2519}
2520
2521static inline void
2522rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2523 unsigned long tail, struct rb_event_info *info)
2524{
2525 struct buffer_page *tail_page = info->tail_page;
2526 struct ring_buffer_event *event;
2527 unsigned long length = info->length;
2528
2529
2530
2531
2532
2533 if (tail >= BUF_PAGE_SIZE) {
2534
2535
2536
2537
2538
2539 if (tail == BUF_PAGE_SIZE)
2540 tail_page->real_end = 0;
2541
2542 local_sub(length, &tail_page->write);
2543 return;
2544 }
2545
2546 event = __rb_page_index(tail_page, tail);
2547
2548
2549 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2550
2551
2552
2553
2554
2555
2556 tail_page->real_end = tail;
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2570
2571
2572
2573 rb_event_set_padding(event);
2574
2575
2576 local_sub(length, &tail_page->write);
2577 return;
2578 }
2579
2580
2581 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2582 event->type_len = RINGBUF_TYPE_PADDING;
2583
2584 event->time_delta = 1;
2585
2586
2587 length = (tail + length) - BUF_PAGE_SIZE;
2588 local_sub(length, &tail_page->write);
2589}
2590
2591static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer);
2592
2593
2594
2595
2596static noinline struct ring_buffer_event *
2597rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2598 unsigned long tail, struct rb_event_info *info)
2599{
2600 struct buffer_page *tail_page = info->tail_page;
2601 struct buffer_page *commit_page = cpu_buffer->commit_page;
2602 struct trace_buffer *buffer = cpu_buffer->buffer;
2603 struct buffer_page *next_page;
2604 int ret;
2605
2606 next_page = tail_page;
2607
2608 rb_inc_page(&next_page);
2609
2610
2611
2612
2613
2614
2615 if (unlikely(next_page == commit_page)) {
2616 local_inc(&cpu_buffer->commit_overrun);
2617 goto out_reset;
2618 }
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634 if (rb_is_head_page(next_page, &tail_page->list)) {
2635
2636
2637
2638
2639
2640 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2641
2642
2643
2644
2645 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2646 local_inc(&cpu_buffer->dropped_events);
2647 goto out_reset;
2648 }
2649
2650 ret = rb_handle_head_page(cpu_buffer,
2651 tail_page,
2652 next_page);
2653 if (ret < 0)
2654 goto out_reset;
2655 if (ret)
2656 goto out_again;
2657 } else {
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668 if (unlikely((cpu_buffer->commit_page !=
2669 cpu_buffer->tail_page) &&
2670 (cpu_buffer->commit_page ==
2671 cpu_buffer->reader_page))) {
2672 local_inc(&cpu_buffer->commit_overrun);
2673 goto out_reset;
2674 }
2675 }
2676 }
2677
2678 rb_tail_page_update(cpu_buffer, tail_page, next_page);
2679
2680 out_again:
2681
2682 rb_reset_tail(cpu_buffer, tail, info);
2683
2684
2685 rb_end_commit(cpu_buffer);
2686
2687 local_inc(&cpu_buffer->committing);
2688
2689
2690 return ERR_PTR(-EAGAIN);
2691
2692 out_reset:
2693
2694 rb_reset_tail(cpu_buffer, tail, info);
2695
2696 return NULL;
2697}
2698
2699
2700static struct ring_buffer_event *
2701rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
2702{
2703 if (abs)
2704 event->type_len = RINGBUF_TYPE_TIME_STAMP;
2705 else
2706 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
2707
2708
2709 if (abs || rb_event_index(event)) {
2710 event->time_delta = delta & TS_MASK;
2711 event->array[0] = delta >> TS_SHIFT;
2712 } else {
2713
2714 event->time_delta = 0;
2715 event->array[0] = 0;
2716 }
2717
2718 return skip_time_extend(event);
2719}
2720
2721#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2722static inline bool sched_clock_stable(void)
2723{
2724 return true;
2725}
2726#endif
2727
2728static void
2729rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2730 struct rb_event_info *info)
2731{
2732 u64 write_stamp;
2733
2734 WARN_ONCE(1, "Delta way too big! %llu ts=%llu before=%llu after=%llu write stamp=%llu\n%s",
2735 (unsigned long long)info->delta,
2736 (unsigned long long)info->ts,
2737 (unsigned long long)info->before,
2738 (unsigned long long)info->after,
2739 (unsigned long long)(rb_time_read(&cpu_buffer->write_stamp, &write_stamp) ? write_stamp : 0),
2740 sched_clock_stable() ? "" :
2741 "If you just came from a suspend/resume,\n"
2742 "please switch to the trace global clock:\n"
2743 " echo global > /sys/kernel/debug/tracing/trace_clock\n"
2744 "or add trace_clock=global to the kernel command line\n");
2745}
2746
2747static void rb_add_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
2748 struct ring_buffer_event **event,
2749 struct rb_event_info *info,
2750 u64 *delta,
2751 unsigned int *length)
2752{
2753 bool abs = info->add_timestamp &
2754 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE);
2755
2756 if (unlikely(info->delta > (1ULL << 59))) {
2757
2758 if (info->before == info->after && info->before > info->ts) {
2759
2760 static int once;
2761
2762
2763
2764
2765
2766 if (!once) {
2767 once++;
2768 pr_warn("Ring buffer clock went backwards: %llu -> %llu\n",
2769 info->before, info->ts);
2770 }
2771 } else
2772 rb_check_timestamp(cpu_buffer, info);
2773 if (!abs)
2774 info->delta = 0;
2775 }
2776 *event = rb_add_time_stamp(*event, info->delta, abs);
2777 *length -= RB_LEN_TIME_EXTEND;
2778 *delta = 0;
2779}
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792static void
2793rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
2794 struct ring_buffer_event *event,
2795 struct rb_event_info *info)
2796{
2797 unsigned length = info->length;
2798 u64 delta = info->delta;
2799 unsigned int nest = local_read(&cpu_buffer->committing) - 1;
2800
2801 if (!WARN_ON_ONCE(nest >= MAX_NEST))
2802 cpu_buffer->event_stamp[nest] = info->ts;
2803
2804
2805
2806
2807
2808 if (unlikely(info->add_timestamp))
2809 rb_add_timestamp(cpu_buffer, &event, info, &delta, &length);
2810
2811 event->time_delta = delta;
2812 length -= RB_EVNT_HDR_SIZE;
2813 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
2814 event->type_len = 0;
2815 event->array[0] = length;
2816 } else
2817 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
2818}
2819
2820static unsigned rb_calculate_event_length(unsigned length)
2821{
2822 struct ring_buffer_event event;
2823
2824
2825 if (!length)
2826 length++;
2827
2828 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2829 length += sizeof(event.array[0]);
2830
2831 length += RB_EVNT_HDR_SIZE;
2832 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846 if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
2847 length += RB_ALIGNMENT;
2848
2849 return length;
2850}
2851
2852static u64 rb_time_delta(struct ring_buffer_event *event)
2853{
2854 switch (event->type_len) {
2855 case RINGBUF_TYPE_PADDING:
2856 return 0;
2857
2858 case RINGBUF_TYPE_TIME_EXTEND:
2859 return rb_event_time_stamp(event);
2860
2861 case RINGBUF_TYPE_TIME_STAMP:
2862 return 0;
2863
2864 case RINGBUF_TYPE_DATA:
2865 return event->time_delta;
2866 default:
2867 return 0;
2868 }
2869}
2870
2871static inline int
2872rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2873 struct ring_buffer_event *event)
2874{
2875 unsigned long new_index, old_index;
2876 struct buffer_page *bpage;
2877 unsigned long index;
2878 unsigned long addr;
2879 u64 write_stamp;
2880 u64 delta;
2881
2882 new_index = rb_event_index(event);
2883 old_index = new_index + rb_event_ts_length(event);
2884 addr = (unsigned long)event;
2885 addr &= PAGE_MASK;
2886
2887 bpage = READ_ONCE(cpu_buffer->tail_page);
2888
2889 delta = rb_time_delta(event);
2890
2891 if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp))
2892 return 0;
2893
2894
2895 barrier();
2896
2897 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2898 unsigned long write_mask =
2899 local_read(&bpage->write) & ~RB_WRITE_MASK;
2900 unsigned long event_length = rb_event_length(event);
2901
2902
2903 if (!rb_time_cmpxchg(&cpu_buffer->write_stamp,
2904 write_stamp, write_stamp - delta))
2905 return 0;
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915 if (!delta)
2916 rb_time_set(&cpu_buffer->before_stamp, 0);
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932 old_index += write_mask;
2933 new_index += write_mask;
2934 index = local_cmpxchg(&bpage->write, old_index, new_index);
2935 if (index == old_index) {
2936
2937 local_sub(event_length, &cpu_buffer->entries_bytes);
2938 return 1;
2939 }
2940 }
2941
2942
2943 return 0;
2944}
2945
2946static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2947{
2948 local_inc(&cpu_buffer->committing);
2949 local_inc(&cpu_buffer->commits);
2950}
2951
2952static __always_inline void
2953rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
2954{
2955 unsigned long max_count;
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965 again:
2966 max_count = cpu_buffer->nr_pages * 100;
2967
2968 while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) {
2969 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
2970 return;
2971 if (RB_WARN_ON(cpu_buffer,
2972 rb_is_reader_page(cpu_buffer->tail_page)))
2973 return;
2974 local_set(&cpu_buffer->commit_page->page->commit,
2975 rb_page_write(cpu_buffer->commit_page));
2976 rb_inc_page(&cpu_buffer->commit_page);
2977
2978 barrier();
2979 }
2980 while (rb_commit_index(cpu_buffer) !=
2981 rb_page_write(cpu_buffer->commit_page)) {
2982
2983 local_set(&cpu_buffer->commit_page->page->commit,
2984 rb_page_write(cpu_buffer->commit_page));
2985 RB_WARN_ON(cpu_buffer,
2986 local_read(&cpu_buffer->commit_page->page->commit) &
2987 ~RB_WRITE_MASK);
2988 barrier();
2989 }
2990
2991
2992 barrier();
2993
2994
2995
2996
2997
2998
2999 if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)))
3000 goto again;
3001}
3002
3003static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
3004{
3005 unsigned long commits;
3006
3007 if (RB_WARN_ON(cpu_buffer,
3008 !local_read(&cpu_buffer->committing)))
3009 return;
3010
3011 again:
3012 commits = local_read(&cpu_buffer->commits);
3013
3014 barrier();
3015 if (local_read(&cpu_buffer->committing) == 1)
3016 rb_set_commit_to_write(cpu_buffer);
3017
3018 local_dec(&cpu_buffer->committing);
3019
3020
3021 barrier();
3022
3023
3024
3025
3026
3027
3028 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
3029 !local_read(&cpu_buffer->committing)) {
3030 local_inc(&cpu_buffer->committing);
3031 goto again;
3032 }
3033}
3034
3035static inline void rb_event_discard(struct ring_buffer_event *event)
3036{
3037 if (extended_time(event))
3038 event = skip_time_extend(event);
3039
3040
3041 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
3042 event->type_len = RINGBUF_TYPE_PADDING;
3043
3044 if (!event->time_delta)
3045 event->time_delta = 1;
3046}
3047
3048static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
3049 struct ring_buffer_event *event)
3050{
3051 local_inc(&cpu_buffer->entries);
3052 rb_end_commit(cpu_buffer);
3053}
3054
3055static __always_inline void
3056rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
3057{
3058 size_t nr_pages;
3059 size_t dirty;
3060 size_t full;
3061
3062 if (buffer->irq_work.waiters_pending) {
3063 buffer->irq_work.waiters_pending = false;
3064
3065 irq_work_queue(&buffer->irq_work.work);
3066 }
3067
3068 if (cpu_buffer->irq_work.waiters_pending) {
3069 cpu_buffer->irq_work.waiters_pending = false;
3070
3071 irq_work_queue(&cpu_buffer->irq_work.work);
3072 }
3073
3074 if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
3075 return;
3076
3077 if (cpu_buffer->reader_page == cpu_buffer->commit_page)
3078 return;
3079
3080 if (!cpu_buffer->irq_work.full_waiters_pending)
3081 return;
3082
3083 cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
3084
3085 full = cpu_buffer->shortest_full;
3086 nr_pages = cpu_buffer->nr_pages;
3087 dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
3088 if (full && nr_pages && (dirty * 100) <= full * nr_pages)
3089 return;
3090
3091 cpu_buffer->irq_work.wakeup_full = true;
3092 cpu_buffer->irq_work.full_waiters_pending = false;
3093
3094 irq_work_queue(&cpu_buffer->irq_work.work);
3095}
3096
3097#ifdef CONFIG_RING_BUFFER_RECORD_RECURSION
3098# define do_ring_buffer_record_recursion() \
3099 do_ftrace_record_recursion(_THIS_IP_, _RET_IP_)
3100#else
3101# define do_ring_buffer_record_recursion() do { } while (0)
3102#endif
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166static __always_inline int
3167trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
3168{
3169 unsigned int val = cpu_buffer->current_context;
3170 int bit = interrupt_context_level();
3171
3172 bit = RB_CTX_NORMAL - bit;
3173
3174 if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
3175
3176
3177
3178
3179
3180 bit = RB_CTX_TRANSITION;
3181 if (val & (1 << (bit + cpu_buffer->nest))) {
3182 do_ring_buffer_record_recursion();
3183 return 1;
3184 }
3185 }
3186
3187 val |= (1 << (bit + cpu_buffer->nest));
3188 cpu_buffer->current_context = val;
3189
3190 return 0;
3191}
3192
3193static __always_inline void
3194trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
3195{
3196 cpu_buffer->current_context &=
3197 cpu_buffer->current_context - (1 << cpu_buffer->nest);
3198}
3199
3200
3201#define NESTED_BITS 5
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216void ring_buffer_nest_start(struct trace_buffer *buffer)
3217{
3218 struct ring_buffer_per_cpu *cpu_buffer;
3219 int cpu;
3220
3221
3222 preempt_disable_notrace();
3223 cpu = raw_smp_processor_id();
3224 cpu_buffer = buffer->buffers[cpu];
3225
3226 cpu_buffer->nest += NESTED_BITS;
3227}
3228
3229
3230
3231
3232
3233
3234
3235
3236void ring_buffer_nest_end(struct trace_buffer *buffer)
3237{
3238 struct ring_buffer_per_cpu *cpu_buffer;
3239 int cpu;
3240
3241
3242 cpu = raw_smp_processor_id();
3243 cpu_buffer = buffer->buffers[cpu];
3244
3245 cpu_buffer->nest -= NESTED_BITS;
3246 preempt_enable_notrace();
3247}
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258int ring_buffer_unlock_commit(struct trace_buffer *buffer,
3259 struct ring_buffer_event *event)
3260{
3261 struct ring_buffer_per_cpu *cpu_buffer;
3262 int cpu = raw_smp_processor_id();
3263
3264 cpu_buffer = buffer->buffers[cpu];
3265
3266 rb_commit(cpu_buffer, event);
3267
3268 rb_wakeups(buffer, cpu_buffer);
3269
3270 trace_recursive_unlock(cpu_buffer);
3271
3272 preempt_enable_notrace();
3273
3274 return 0;
3275}
3276EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
3277
3278
3279#define CHECK_FULL_PAGE 1L
3280
3281#ifdef CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS
3282static void dump_buffer_page(struct buffer_data_page *bpage,
3283 struct rb_event_info *info,
3284 unsigned long tail)
3285{
3286 struct ring_buffer_event *event;
3287 u64 ts, delta;
3288 int e;
3289
3290 ts = bpage->time_stamp;
3291 pr_warn(" [%lld] PAGE TIME STAMP\n", ts);
3292
3293 for (e = 0; e < tail; e += rb_event_length(event)) {
3294
3295 event = (struct ring_buffer_event *)(bpage->data + e);
3296
3297 switch (event->type_len) {
3298
3299 case RINGBUF_TYPE_TIME_EXTEND:
3300 delta = rb_event_time_stamp(event);
3301 ts += delta;
3302 pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta);
3303 break;
3304
3305 case RINGBUF_TYPE_TIME_STAMP:
3306 delta = rb_event_time_stamp(event);
3307 ts = delta;
3308 pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta);
3309 break;
3310
3311 case RINGBUF_TYPE_PADDING:
3312 ts += event->time_delta;
3313 pr_warn(" [%lld] delta:%d PADDING\n", ts, event->time_delta);
3314 break;
3315
3316 case RINGBUF_TYPE_DATA:
3317 ts += event->time_delta;
3318 pr_warn(" [%lld] delta:%d\n", ts, event->time_delta);
3319 break;
3320
3321 default:
3322 break;
3323 }
3324 }
3325}
3326
3327static DEFINE_PER_CPU(atomic_t, checking);
3328static atomic_t ts_dump;
3329
3330
3331
3332
3333
3334static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
3335 struct rb_event_info *info,
3336 unsigned long tail)
3337{
3338 struct ring_buffer_event *event;
3339 struct buffer_data_page *bpage;
3340 u64 ts, delta;
3341 bool full = false;
3342 int e;
3343
3344 bpage = info->tail_page->page;
3345
3346 if (tail == CHECK_FULL_PAGE) {
3347 full = true;
3348 tail = local_read(&bpage->commit);
3349 } else if (info->add_timestamp &
3350 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) {
3351
3352 return;
3353 }
3354
3355
3356
3357
3358
3359 if (tail <= 8 || tail > local_read(&bpage->commit))
3360 return;
3361
3362
3363
3364
3365 if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
3366 goto out;
3367
3368 ts = bpage->time_stamp;
3369
3370 for (e = 0; e < tail; e += rb_event_length(event)) {
3371
3372 event = (struct ring_buffer_event *)(bpage->data + e);
3373
3374 switch (event->type_len) {
3375
3376 case RINGBUF_TYPE_TIME_EXTEND:
3377 delta = rb_event_time_stamp(event);
3378 ts += delta;
3379 break;
3380
3381 case RINGBUF_TYPE_TIME_STAMP:
3382 delta = rb_event_time_stamp(event);
3383 ts = delta;
3384 break;
3385
3386 case RINGBUF_TYPE_PADDING:
3387 if (event->time_delta == 1)
3388 break;
3389 fallthrough;
3390 case RINGBUF_TYPE_DATA:
3391 ts += event->time_delta;
3392 break;
3393
3394 default:
3395 RB_WARN_ON(cpu_buffer, 1);
3396 }
3397 }
3398 if ((full && ts > info->ts) ||
3399 (!full && ts + info->delta != info->ts)) {
3400
3401 if (atomic_inc_return(&ts_dump) != 1) {
3402 atomic_dec(&ts_dump);
3403 goto out;
3404 }
3405 atomic_inc(&cpu_buffer->record_disabled);
3406
3407 WARN_ON_ONCE(system_state != SYSTEM_BOOTING);
3408 pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n",
3409 cpu_buffer->cpu,
3410 ts + info->delta, info->ts, info->delta,
3411 info->before, info->after,
3412 full ? " (full)" : "");
3413 dump_buffer_page(bpage, info, tail);
3414 atomic_dec(&ts_dump);
3415
3416 return;
3417 }
3418out:
3419 atomic_dec(this_cpu_ptr(&checking));
3420}
3421#else
3422static inline void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
3423 struct rb_event_info *info,
3424 unsigned long tail)
3425{
3426}
3427#endif
3428
3429static struct ring_buffer_event *
3430__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
3431 struct rb_event_info *info)
3432{
3433 struct ring_buffer_event *event;
3434 struct buffer_page *tail_page;
3435 unsigned long tail, write, w;
3436 bool a_ok;
3437 bool b_ok;
3438
3439
3440 tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
3441
3442 w = local_read(&tail_page->write) & RB_WRITE_MASK;
3443 barrier();
3444 b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
3445 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3446 barrier();
3447 info->ts = rb_time_stamp(cpu_buffer->buffer);
3448
3449 if ((info->add_timestamp & RB_ADD_STAMP_ABSOLUTE)) {
3450 info->delta = info->ts;
3451 } else {
3452
3453
3454
3455
3456
3457 if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) {
3458 info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND;
3459 info->length += RB_LEN_TIME_EXTEND;
3460 } else {
3461 info->delta = info->ts - info->after;
3462 if (unlikely(test_time_stamp(info->delta))) {
3463 info->add_timestamp |= RB_ADD_STAMP_EXTEND;
3464 info->length += RB_LEN_TIME_EXTEND;
3465 }
3466 }
3467 }
3468
3469 rb_time_set(&cpu_buffer->before_stamp, info->ts);
3470
3471 write = local_add_return(info->length, &tail_page->write);
3472
3473
3474 write &= RB_WRITE_MASK;
3475
3476 tail = write - info->length;
3477
3478
3479 if (unlikely(write > BUF_PAGE_SIZE)) {
3480
3481 b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
3482 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3483 if (a_ok && b_ok && info->before != info->after)
3484 (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
3485 info->before, info->after);
3486 if (a_ok && b_ok)
3487 check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
3488 return rb_move_tail(cpu_buffer, tail, info);
3489 }
3490
3491 if (likely(tail == w)) {
3492 u64 save_before;
3493 bool s_ok;
3494
3495
3496 rb_time_set(&cpu_buffer->write_stamp, info->ts);
3497 barrier();
3498 s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before);
3499 RB_WARN_ON(cpu_buffer, !s_ok);
3500 if (likely(!(info->add_timestamp &
3501 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE))))
3502
3503 info->delta = info->ts - info->after;
3504 else
3505
3506 info->delta = info->ts;
3507 barrier();
3508 check_buffer(cpu_buffer, info, tail);
3509 if (unlikely(info->ts != save_before)) {
3510
3511
3512 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3513 RB_WARN_ON(cpu_buffer, !a_ok);
3514
3515
3516 if (save_before > info->after) {
3517
3518
3519
3520
3521 (void)rb_time_cmpxchg(&cpu_buffer->write_stamp,
3522 info->after, save_before);
3523 }
3524 }
3525 } else {
3526 u64 ts;
3527
3528 a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
3529
3530 RB_WARN_ON(cpu_buffer, !a_ok);
3531 ts = rb_time_stamp(cpu_buffer->buffer);
3532 barrier();
3533 if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
3534 info->after < ts &&
3535 rb_time_cmpxchg(&cpu_buffer->write_stamp,
3536 info->after, ts)) {
3537
3538 info->delta = ts - info->after;
3539 } else {
3540
3541
3542
3543
3544
3545
3546
3547
3548 info->delta = 0;
3549 }
3550 info->ts = ts;
3551 info->add_timestamp &= ~RB_ADD_STAMP_FORCE;
3552 }
3553
3554
3555
3556
3557
3558 if (unlikely(!tail && !(info->add_timestamp &
3559 (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE))))
3560 info->delta = 0;
3561
3562
3563
3564 event = __rb_page_index(tail_page, tail);
3565 rb_update_event(cpu_buffer, event, info);
3566
3567 local_inc(&tail_page->entries);
3568
3569
3570
3571
3572
3573 if (unlikely(!tail))
3574 tail_page->page->time_stamp = info->ts;
3575
3576
3577 local_add(info->length, &cpu_buffer->entries_bytes);
3578
3579 return event;
3580}
3581
3582static __always_inline struct ring_buffer_event *
3583rb_reserve_next_event(struct trace_buffer *buffer,
3584 struct ring_buffer_per_cpu *cpu_buffer,
3585 unsigned long length)
3586{
3587 struct ring_buffer_event *event;
3588 struct rb_event_info info;
3589 int nr_loops = 0;
3590 int add_ts_default;
3591
3592 rb_start_commit(cpu_buffer);
3593
3594
3595#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3596
3597
3598
3599
3600
3601
3602 barrier();
3603 if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
3604 local_dec(&cpu_buffer->committing);
3605 local_dec(&cpu_buffer->commits);
3606 return NULL;
3607 }
3608#endif
3609
3610 info.length = rb_calculate_event_length(length);
3611
3612 if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
3613 add_ts_default = RB_ADD_STAMP_ABSOLUTE;
3614 info.length += RB_LEN_TIME_EXTEND;
3615 } else {
3616 add_ts_default = RB_ADD_STAMP_NONE;
3617 }
3618
3619 again:
3620 info.add_timestamp = add_ts_default;
3621 info.delta = 0;
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
3633 goto out_fail;
3634
3635 event = __rb_reserve_next(cpu_buffer, &info);
3636
3637 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
3638 if (info.add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND))
3639 info.length -= RB_LEN_TIME_EXTEND;
3640 goto again;
3641 }
3642
3643 if (likely(event))
3644 return event;
3645 out_fail:
3646 rb_end_commit(cpu_buffer);
3647 return NULL;
3648}
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665struct ring_buffer_event *
3666ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
3667{
3668 struct ring_buffer_per_cpu *cpu_buffer;
3669 struct ring_buffer_event *event;
3670 int cpu;
3671
3672
3673 preempt_disable_notrace();
3674
3675 if (unlikely(atomic_read(&buffer->record_disabled)))
3676 goto out;
3677
3678 cpu = raw_smp_processor_id();
3679
3680 if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
3681 goto out;
3682
3683 cpu_buffer = buffer->buffers[cpu];
3684
3685 if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
3686 goto out;
3687
3688 if (unlikely(length > BUF_MAX_DATA_SIZE))
3689 goto out;
3690
3691 if (unlikely(trace_recursive_lock(cpu_buffer)))
3692 goto out;
3693
3694 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3695 if (!event)
3696 goto out_unlock;
3697
3698 return event;
3699
3700 out_unlock:
3701 trace_recursive_unlock(cpu_buffer);
3702 out:
3703 preempt_enable_notrace();
3704 return NULL;
3705}
3706EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
3707
3708
3709
3710
3711
3712
3713
3714static inline void
3715rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
3716 struct ring_buffer_event *event)
3717{
3718 unsigned long addr = (unsigned long)event;
3719 struct buffer_page *bpage = cpu_buffer->commit_page;
3720 struct buffer_page *start;
3721
3722 addr &= PAGE_MASK;
3723
3724
3725 if (likely(bpage->page == (void *)addr)) {
3726 local_dec(&bpage->entries);
3727 return;
3728 }
3729
3730
3731
3732
3733
3734 rb_inc_page(&bpage);
3735 start = bpage;
3736 do {
3737 if (bpage->page == (void *)addr) {
3738 local_dec(&bpage->entries);
3739 return;
3740 }
3741 rb_inc_page(&bpage);
3742 } while (bpage != start);
3743
3744
3745 RB_WARN_ON(cpu_buffer, 1);
3746}
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767void ring_buffer_discard_commit(struct trace_buffer *buffer,
3768 struct ring_buffer_event *event)
3769{
3770 struct ring_buffer_per_cpu *cpu_buffer;
3771 int cpu;
3772
3773
3774 rb_event_discard(event);
3775
3776 cpu = smp_processor_id();
3777 cpu_buffer = buffer->buffers[cpu];
3778
3779
3780
3781
3782
3783
3784 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
3785
3786 rb_decrement_entry(cpu_buffer, event);
3787 if (rb_try_to_discard(cpu_buffer, event))
3788 goto out;
3789
3790 out:
3791 rb_end_commit(cpu_buffer);
3792
3793 trace_recursive_unlock(cpu_buffer);
3794
3795 preempt_enable_notrace();
3796
3797}
3798EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813int ring_buffer_write(struct trace_buffer *buffer,
3814 unsigned long length,
3815 void *data)
3816{
3817 struct ring_buffer_per_cpu *cpu_buffer;
3818 struct ring_buffer_event *event;
3819 void *body;
3820 int ret = -EBUSY;
3821 int cpu;
3822
3823 preempt_disable_notrace();
3824
3825 if (atomic_read(&buffer->record_disabled))
3826 goto out;
3827
3828 cpu = raw_smp_processor_id();
3829
3830 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3831 goto out;
3832
3833 cpu_buffer = buffer->buffers[cpu];
3834
3835 if (atomic_read(&cpu_buffer->record_disabled))
3836 goto out;
3837
3838 if (length > BUF_MAX_DATA_SIZE)
3839 goto out;
3840
3841 if (unlikely(trace_recursive_lock(cpu_buffer)))
3842 goto out;
3843
3844 event = rb_reserve_next_event(buffer, cpu_buffer, length);
3845 if (!event)
3846 goto out_unlock;
3847
3848 body = rb_event_data(event);
3849
3850 memcpy(body, data, length);
3851
3852 rb_commit(cpu_buffer, event);
3853
3854 rb_wakeups(buffer, cpu_buffer);
3855
3856 ret = 0;
3857
3858 out_unlock:
3859 trace_recursive_unlock(cpu_buffer);
3860
3861 out:
3862 preempt_enable_notrace();
3863
3864 return ret;
3865}
3866EXPORT_SYMBOL_GPL(ring_buffer_write);
3867
3868static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
3869{
3870 struct buffer_page *reader = cpu_buffer->reader_page;
3871 struct buffer_page *head = rb_set_head_page(cpu_buffer);
3872 struct buffer_page *commit = cpu_buffer->commit_page;
3873
3874
3875 if (unlikely(!head))
3876 return true;
3877
3878
3879 if (reader->read != rb_page_commit(reader))
3880 return false;
3881
3882
3883
3884
3885
3886 if (commit == reader)
3887 return true;
3888
3889
3890
3891
3892
3893 if (commit != head)
3894 return false;
3895
3896
3897
3898
3899
3900
3901 return rb_page_commit(commit) == 0;
3902}
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913void ring_buffer_record_disable(struct trace_buffer *buffer)
3914{
3915 atomic_inc(&buffer->record_disabled);
3916}
3917EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
3918
3919
3920
3921
3922
3923
3924
3925
3926void ring_buffer_record_enable(struct trace_buffer *buffer)
3927{
3928 atomic_dec(&buffer->record_disabled);
3929}
3930EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943void ring_buffer_record_off(struct trace_buffer *buffer)
3944{
3945 unsigned int rd;
3946 unsigned int new_rd;
3947
3948 do {
3949 rd = atomic_read(&buffer->record_disabled);
3950 new_rd = rd | RB_BUFFER_OFF;
3951 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3952}
3953EXPORT_SYMBOL_GPL(ring_buffer_record_off);
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966void ring_buffer_record_on(struct trace_buffer *buffer)
3967{
3968 unsigned int rd;
3969 unsigned int new_rd;
3970
3971 do {
3972 rd = atomic_read(&buffer->record_disabled);
3973 new_rd = rd & ~RB_BUFFER_OFF;
3974 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
3975}
3976EXPORT_SYMBOL_GPL(ring_buffer_record_on);
3977
3978
3979
3980
3981
3982
3983
3984bool ring_buffer_record_is_on(struct trace_buffer *buffer)
3985{
3986 return !atomic_read(&buffer->record_disabled);
3987}
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000bool ring_buffer_record_is_set_on(struct trace_buffer *buffer)
4001{
4002 return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
4003}
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu)
4016{
4017 struct ring_buffer_per_cpu *cpu_buffer;
4018
4019 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4020 return;
4021
4022 cpu_buffer = buffer->buffers[cpu];
4023 atomic_inc(&cpu_buffer->record_disabled);
4024}
4025EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu)
4036{
4037 struct ring_buffer_per_cpu *cpu_buffer;
4038
4039 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4040 return;
4041
4042 cpu_buffer = buffer->buffers[cpu];
4043 atomic_dec(&cpu_buffer->record_disabled);
4044}
4045EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
4046
4047
4048
4049
4050
4051
4052
4053static inline unsigned long
4054rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
4055{
4056 return local_read(&cpu_buffer->entries) -
4057 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
4058}
4059
4060
4061
4062
4063
4064
4065u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
4066{
4067 unsigned long flags;
4068 struct ring_buffer_per_cpu *cpu_buffer;
4069 struct buffer_page *bpage;
4070 u64 ret = 0;
4071
4072 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4073 return 0;
4074
4075 cpu_buffer = buffer->buffers[cpu];
4076 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4077
4078
4079
4080
4081 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
4082 bpage = cpu_buffer->reader_page;
4083 else
4084 bpage = rb_set_head_page(cpu_buffer);
4085 if (bpage)
4086 ret = bpage->page->time_stamp;
4087 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4088
4089 return ret;
4090}
4091EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
4092
4093
4094
4095
4096
4097
4098unsigned long ring_buffer_bytes_cpu(struct trace_buffer *buffer, int cpu)
4099{
4100 struct ring_buffer_per_cpu *cpu_buffer;
4101 unsigned long ret;
4102
4103 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4104 return 0;
4105
4106 cpu_buffer = buffer->buffers[cpu];
4107 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
4108
4109 return ret;
4110}
4111EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
4112
4113
4114
4115
4116
4117
4118unsigned long ring_buffer_entries_cpu(struct trace_buffer *buffer, int cpu)
4119{
4120 struct ring_buffer_per_cpu *cpu_buffer;
4121
4122 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4123 return 0;
4124
4125 cpu_buffer = buffer->buffers[cpu];
4126
4127 return rb_num_of_entries(cpu_buffer);
4128}
4129EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
4130
4131
4132
4133
4134
4135
4136
4137unsigned long ring_buffer_overrun_cpu(struct trace_buffer *buffer, int cpu)
4138{
4139 struct ring_buffer_per_cpu *cpu_buffer;
4140 unsigned long ret;
4141
4142 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4143 return 0;
4144
4145 cpu_buffer = buffer->buffers[cpu];
4146 ret = local_read(&cpu_buffer->overrun);
4147
4148 return ret;
4149}
4150EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
4151
4152
4153
4154
4155
4156
4157
4158
4159unsigned long
4160ring_buffer_commit_overrun_cpu(struct trace_buffer *buffer, int cpu)
4161{
4162 struct ring_buffer_per_cpu *cpu_buffer;
4163 unsigned long ret;
4164
4165 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4166 return 0;
4167
4168 cpu_buffer = buffer->buffers[cpu];
4169 ret = local_read(&cpu_buffer->commit_overrun);
4170
4171 return ret;
4172}
4173EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
4174
4175
4176
4177
4178
4179
4180
4181unsigned long
4182ring_buffer_dropped_events_cpu(struct trace_buffer *buffer, int cpu)
4183{
4184 struct ring_buffer_per_cpu *cpu_buffer;
4185 unsigned long ret;
4186
4187 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4188 return 0;
4189
4190 cpu_buffer = buffer->buffers[cpu];
4191 ret = local_read(&cpu_buffer->dropped_events);
4192
4193 return ret;
4194}
4195EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
4196
4197
4198
4199
4200
4201
4202unsigned long
4203ring_buffer_read_events_cpu(struct trace_buffer *buffer, int cpu)
4204{
4205 struct ring_buffer_per_cpu *cpu_buffer;
4206
4207 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4208 return 0;
4209
4210 cpu_buffer = buffer->buffers[cpu];
4211 return cpu_buffer->read;
4212}
4213EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
4214
4215
4216
4217
4218
4219
4220
4221
4222unsigned long ring_buffer_entries(struct trace_buffer *buffer)
4223{
4224 struct ring_buffer_per_cpu *cpu_buffer;
4225 unsigned long entries = 0;
4226 int cpu;
4227
4228
4229 for_each_buffer_cpu(buffer, cpu) {
4230 cpu_buffer = buffer->buffers[cpu];
4231 entries += rb_num_of_entries(cpu_buffer);
4232 }
4233
4234 return entries;
4235}
4236EXPORT_SYMBOL_GPL(ring_buffer_entries);
4237
4238
4239
4240
4241
4242
4243
4244
4245unsigned long ring_buffer_overruns(struct trace_buffer *buffer)
4246{
4247 struct ring_buffer_per_cpu *cpu_buffer;
4248 unsigned long overruns = 0;
4249 int cpu;
4250
4251
4252 for_each_buffer_cpu(buffer, cpu) {
4253 cpu_buffer = buffer->buffers[cpu];
4254 overruns += local_read(&cpu_buffer->overrun);
4255 }
4256
4257 return overruns;
4258}
4259EXPORT_SYMBOL_GPL(ring_buffer_overruns);
4260
4261static void rb_iter_reset(struct ring_buffer_iter *iter)
4262{
4263 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4264
4265
4266 iter->head_page = cpu_buffer->reader_page;
4267 iter->head = cpu_buffer->reader_page->read;
4268 iter->next_event = iter->head;
4269
4270 iter->cache_reader_page = iter->head_page;
4271 iter->cache_read = cpu_buffer->read;
4272
4273 if (iter->head) {
4274 iter->read_stamp = cpu_buffer->read_stamp;
4275 iter->page_stamp = cpu_buffer->reader_page->page->time_stamp;
4276 } else {
4277 iter->read_stamp = iter->head_page->page->time_stamp;
4278 iter->page_stamp = iter->read_stamp;
4279 }
4280}
4281
4282
4283
4284
4285
4286
4287
4288
4289void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
4290{
4291 struct ring_buffer_per_cpu *cpu_buffer;
4292 unsigned long flags;
4293
4294 if (!iter)
4295 return;
4296
4297 cpu_buffer = iter->cpu_buffer;
4298
4299 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4300 rb_iter_reset(iter);
4301 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4302}
4303EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
4304
4305
4306
4307
4308
4309int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
4310{
4311 struct ring_buffer_per_cpu *cpu_buffer;
4312 struct buffer_page *reader;
4313 struct buffer_page *head_page;
4314 struct buffer_page *commit_page;
4315 struct buffer_page *curr_commit_page;
4316 unsigned commit;
4317 u64 curr_commit_ts;
4318 u64 commit_ts;
4319
4320 cpu_buffer = iter->cpu_buffer;
4321 reader = cpu_buffer->reader_page;
4322 head_page = cpu_buffer->head_page;
4323 commit_page = cpu_buffer->commit_page;
4324 commit_ts = commit_page->page->time_stamp;
4325
4326
4327
4328
4329
4330
4331 smp_rmb();
4332 commit = rb_page_commit(commit_page);
4333
4334 smp_rmb();
4335
4336
4337 curr_commit_page = READ_ONCE(cpu_buffer->commit_page);
4338 curr_commit_ts = READ_ONCE(curr_commit_page->page->time_stamp);
4339
4340
4341 if (curr_commit_page != commit_page ||
4342 curr_commit_ts != commit_ts)
4343 return 0;
4344
4345
4346 return ((iter->head_page == commit_page && iter->head >= commit) ||
4347 (iter->head_page == reader && commit_page == head_page &&
4348 head_page->read == commit &&
4349 iter->head == rb_page_commit(cpu_buffer->reader_page)));
4350}
4351EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
4352
4353static void
4354rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
4355 struct ring_buffer_event *event)
4356{
4357 u64 delta;
4358
4359 switch (event->type_len) {
4360 case RINGBUF_TYPE_PADDING:
4361 return;
4362
4363 case RINGBUF_TYPE_TIME_EXTEND:
4364 delta = rb_event_time_stamp(event);
4365 cpu_buffer->read_stamp += delta;
4366 return;
4367
4368 case RINGBUF_TYPE_TIME_STAMP:
4369 delta = rb_event_time_stamp(event);
4370 cpu_buffer->read_stamp = delta;
4371 return;
4372
4373 case RINGBUF_TYPE_DATA:
4374 cpu_buffer->read_stamp += event->time_delta;
4375 return;
4376
4377 default:
4378 RB_WARN_ON(cpu_buffer, 1);
4379 }
4380 return;
4381}
4382
4383static void
4384rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
4385 struct ring_buffer_event *event)
4386{
4387 u64 delta;
4388
4389 switch (event->type_len) {
4390 case RINGBUF_TYPE_PADDING:
4391 return;
4392
4393 case RINGBUF_TYPE_TIME_EXTEND:
4394 delta = rb_event_time_stamp(event);
4395 iter->read_stamp += delta;
4396 return;
4397
4398 case RINGBUF_TYPE_TIME_STAMP:
4399 delta = rb_event_time_stamp(event);
4400 iter->read_stamp = delta;
4401 return;
4402
4403 case RINGBUF_TYPE_DATA:
4404 iter->read_stamp += event->time_delta;
4405 return;
4406
4407 default:
4408 RB_WARN_ON(iter->cpu_buffer, 1);
4409 }
4410 return;
4411}
4412
4413static struct buffer_page *
4414rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
4415{
4416 struct buffer_page *reader = NULL;
4417 unsigned long overwrite;
4418 unsigned long flags;
4419 int nr_loops = 0;
4420 int ret;
4421
4422 local_irq_save(flags);
4423 arch_spin_lock(&cpu_buffer->lock);
4424
4425 again:
4426
4427
4428
4429
4430
4431
4432 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
4433 reader = NULL;
4434 goto out;
4435 }
4436
4437 reader = cpu_buffer->reader_page;
4438
4439
4440 if (cpu_buffer->reader_page->read < rb_page_size(reader))
4441 goto out;
4442
4443
4444 if (RB_WARN_ON(cpu_buffer,
4445 cpu_buffer->reader_page->read > rb_page_size(reader)))
4446 goto out;
4447
4448
4449 reader = NULL;
4450 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
4451 goto out;
4452
4453
4454 if (rb_num_of_entries(cpu_buffer) == 0)
4455 goto out;
4456
4457
4458
4459
4460 local_set(&cpu_buffer->reader_page->write, 0);
4461 local_set(&cpu_buffer->reader_page->entries, 0);
4462 local_set(&cpu_buffer->reader_page->page->commit, 0);
4463 cpu_buffer->reader_page->real_end = 0;
4464
4465 spin:
4466
4467
4468
4469 reader = rb_set_head_page(cpu_buffer);
4470 if (!reader)
4471 goto out;
4472 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
4473 cpu_buffer->reader_page->list.prev = reader->list.prev;
4474
4475
4476
4477
4478
4479
4480 cpu_buffer->pages = reader->list.prev;
4481
4482
4483 rb_set_list_to_head(&cpu_buffer->reader_page->list);
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494 smp_mb();
4495 overwrite = local_read(&(cpu_buffer->overrun));
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
4509
4510
4511
4512
4513 if (!ret)
4514 goto spin;
4515
4516
4517
4518
4519
4520
4521 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
4522 rb_inc_page(&cpu_buffer->head_page);
4523
4524 local_inc(&cpu_buffer->pages_read);
4525
4526
4527 cpu_buffer->reader_page = reader;
4528 cpu_buffer->reader_page->read = 0;
4529
4530 if (overwrite != cpu_buffer->last_overrun) {
4531 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
4532 cpu_buffer->last_overrun = overwrite;
4533 }
4534
4535 goto again;
4536
4537 out:
4538
4539 if (reader && reader->read == 0)
4540 cpu_buffer->read_stamp = reader->page->time_stamp;
4541
4542 arch_spin_unlock(&cpu_buffer->lock);
4543 local_irq_restore(flags);
4544
4545 return reader;
4546}
4547
4548static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
4549{
4550 struct ring_buffer_event *event;
4551 struct buffer_page *reader;
4552 unsigned length;
4553
4554 reader = rb_get_reader_page(cpu_buffer);
4555
4556
4557 if (RB_WARN_ON(cpu_buffer, !reader))
4558 return;
4559
4560 event = rb_reader_event(cpu_buffer);
4561
4562 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
4563 cpu_buffer->read++;
4564
4565 rb_update_read_stamp(cpu_buffer, event);
4566
4567 length = rb_event_length(event);
4568 cpu_buffer->reader_page->read += length;
4569}
4570
4571static void rb_advance_iter(struct ring_buffer_iter *iter)
4572{
4573 struct ring_buffer_per_cpu *cpu_buffer;
4574
4575 cpu_buffer = iter->cpu_buffer;
4576
4577
4578 if (iter->head == iter->next_event) {
4579
4580 if (rb_iter_head_event(iter) == NULL)
4581 return;
4582 }
4583
4584 iter->head = iter->next_event;
4585
4586
4587
4588
4589 if (iter->next_event >= rb_page_size(iter->head_page)) {
4590
4591 if (iter->head_page == cpu_buffer->commit_page)
4592 return;
4593 rb_inc_iter(iter);
4594 return;
4595 }
4596
4597 rb_update_iter_read_stamp(iter, iter->event);
4598}
4599
4600static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
4601{
4602 return cpu_buffer->lost_events;
4603}
4604
4605static struct ring_buffer_event *
4606rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
4607 unsigned long *lost_events)
4608{
4609 struct ring_buffer_event *event;
4610 struct buffer_page *reader;
4611 int nr_loops = 0;
4612
4613 if (ts)
4614 *ts = 0;
4615 again:
4616
4617
4618
4619
4620
4621
4622 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
4623 return NULL;
4624
4625 reader = rb_get_reader_page(cpu_buffer);
4626 if (!reader)
4627 return NULL;
4628
4629 event = rb_reader_event(cpu_buffer);
4630
4631 switch (event->type_len) {
4632 case RINGBUF_TYPE_PADDING:
4633 if (rb_null_event(event))
4634 RB_WARN_ON(cpu_buffer, 1);
4635
4636
4637
4638
4639
4640
4641
4642
4643 return event;
4644
4645 case RINGBUF_TYPE_TIME_EXTEND:
4646
4647 rb_advance_reader(cpu_buffer);
4648 goto again;
4649
4650 case RINGBUF_TYPE_TIME_STAMP:
4651 if (ts) {
4652 *ts = rb_event_time_stamp(event);
4653 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4654 cpu_buffer->cpu, ts);
4655 }
4656
4657 rb_advance_reader(cpu_buffer);
4658 goto again;
4659
4660 case RINGBUF_TYPE_DATA:
4661 if (ts && !(*ts)) {
4662 *ts = cpu_buffer->read_stamp + event->time_delta;
4663 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4664 cpu_buffer->cpu, ts);
4665 }
4666 if (lost_events)
4667 *lost_events = rb_lost_events(cpu_buffer);
4668 return event;
4669
4670 default:
4671 RB_WARN_ON(cpu_buffer, 1);
4672 }
4673
4674 return NULL;
4675}
4676EXPORT_SYMBOL_GPL(ring_buffer_peek);
4677
4678static struct ring_buffer_event *
4679rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4680{
4681 struct trace_buffer *buffer;
4682 struct ring_buffer_per_cpu *cpu_buffer;
4683 struct ring_buffer_event *event;
4684 int nr_loops = 0;
4685
4686 if (ts)
4687 *ts = 0;
4688
4689 cpu_buffer = iter->cpu_buffer;
4690 buffer = cpu_buffer->buffer;
4691
4692
4693
4694
4695
4696
4697 if (unlikely(iter->cache_read != cpu_buffer->read ||
4698 iter->cache_reader_page != cpu_buffer->reader_page))
4699 rb_iter_reset(iter);
4700
4701 again:
4702 if (ring_buffer_iter_empty(iter))
4703 return NULL;
4704
4705
4706
4707
4708
4709
4710
4711
4712 if (++nr_loops > 3)
4713 return NULL;
4714
4715 if (rb_per_cpu_empty(cpu_buffer))
4716 return NULL;
4717
4718 if (iter->head >= rb_page_size(iter->head_page)) {
4719 rb_inc_iter(iter);
4720 goto again;
4721 }
4722
4723 event = rb_iter_head_event(iter);
4724 if (!event)
4725 goto again;
4726
4727 switch (event->type_len) {
4728 case RINGBUF_TYPE_PADDING:
4729 if (rb_null_event(event)) {
4730 rb_inc_iter(iter);
4731 goto again;
4732 }
4733 rb_advance_iter(iter);
4734 return event;
4735
4736 case RINGBUF_TYPE_TIME_EXTEND:
4737
4738 rb_advance_iter(iter);
4739 goto again;
4740
4741 case RINGBUF_TYPE_TIME_STAMP:
4742 if (ts) {
4743 *ts = rb_event_time_stamp(event);
4744 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
4745 cpu_buffer->cpu, ts);
4746 }
4747
4748 rb_advance_iter(iter);
4749 goto again;
4750
4751 case RINGBUF_TYPE_DATA:
4752 if (ts && !(*ts)) {
4753 *ts = iter->read_stamp + event->time_delta;
4754 ring_buffer_normalize_time_stamp(buffer,
4755 cpu_buffer->cpu, ts);
4756 }
4757 return event;
4758
4759 default:
4760 RB_WARN_ON(cpu_buffer, 1);
4761 }
4762
4763 return NULL;
4764}
4765EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
4766
4767static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
4768{
4769 if (likely(!in_nmi())) {
4770 raw_spin_lock(&cpu_buffer->reader_lock);
4771 return true;
4772 }
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783 if (raw_spin_trylock(&cpu_buffer->reader_lock))
4784 return true;
4785
4786
4787 atomic_inc(&cpu_buffer->record_disabled);
4788 return false;
4789}
4790
4791static inline void
4792rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
4793{
4794 if (likely(locked))
4795 raw_spin_unlock(&cpu_buffer->reader_lock);
4796 return;
4797}
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809struct ring_buffer_event *
4810ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts,
4811 unsigned long *lost_events)
4812{
4813 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4814 struct ring_buffer_event *event;
4815 unsigned long flags;
4816 bool dolock;
4817
4818 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4819 return NULL;
4820
4821 again:
4822 local_irq_save(flags);
4823 dolock = rb_reader_lock(cpu_buffer);
4824 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4825 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4826 rb_advance_reader(cpu_buffer);
4827 rb_reader_unlock(cpu_buffer, dolock);
4828 local_irq_restore(flags);
4829
4830 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4831 goto again;
4832
4833 return event;
4834}
4835
4836
4837
4838
4839
4840
4841bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter)
4842{
4843 bool ret = iter->missed_events != 0;
4844
4845 iter->missed_events = 0;
4846 return ret;
4847}
4848EXPORT_SYMBOL_GPL(ring_buffer_iter_dropped);
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858struct ring_buffer_event *
4859ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4860{
4861 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
4862 struct ring_buffer_event *event;
4863 unsigned long flags;
4864
4865 again:
4866 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4867 event = rb_iter_peek(iter, ts);
4868 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4869
4870 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4871 goto again;
4872
4873 return event;
4874}
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887struct ring_buffer_event *
4888ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
4889 unsigned long *lost_events)
4890{
4891 struct ring_buffer_per_cpu *cpu_buffer;
4892 struct ring_buffer_event *event = NULL;
4893 unsigned long flags;
4894 bool dolock;
4895
4896 again:
4897
4898 preempt_disable();
4899
4900 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4901 goto out;
4902
4903 cpu_buffer = buffer->buffers[cpu];
4904 local_irq_save(flags);
4905 dolock = rb_reader_lock(cpu_buffer);
4906
4907 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
4908 if (event) {
4909 cpu_buffer->lost_events = 0;
4910 rb_advance_reader(cpu_buffer);
4911 }
4912
4913 rb_reader_unlock(cpu_buffer, dolock);
4914 local_irq_restore(flags);
4915
4916 out:
4917 preempt_enable();
4918
4919 if (event && event->type_len == RINGBUF_TYPE_PADDING)
4920 goto again;
4921
4922 return event;
4923}
4924EXPORT_SYMBOL_GPL(ring_buffer_consume);
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947struct ring_buffer_iter *
4948ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
4949{
4950 struct ring_buffer_per_cpu *cpu_buffer;
4951 struct ring_buffer_iter *iter;
4952
4953 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4954 return NULL;
4955
4956 iter = kzalloc(sizeof(*iter), flags);
4957 if (!iter)
4958 return NULL;
4959
4960 iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
4961 if (!iter->event) {
4962 kfree(iter);
4963 return NULL;
4964 }
4965
4966 cpu_buffer = buffer->buffers[cpu];
4967
4968 iter->cpu_buffer = cpu_buffer;
4969
4970 atomic_inc(&cpu_buffer->resize_disabled);
4971
4972 return iter;
4973}
4974EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
4975
4976
4977
4978
4979
4980
4981
4982
4983void
4984ring_buffer_read_prepare_sync(void)
4985{
4986 synchronize_rcu();
4987}
4988EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001void
5002ring_buffer_read_start(struct ring_buffer_iter *iter)
5003{
5004 struct ring_buffer_per_cpu *cpu_buffer;
5005 unsigned long flags;
5006
5007 if (!iter)
5008 return;
5009
5010 cpu_buffer = iter->cpu_buffer;
5011
5012 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5013 arch_spin_lock(&cpu_buffer->lock);
5014 rb_iter_reset(iter);
5015 arch_spin_unlock(&cpu_buffer->lock);
5016 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5017}
5018EXPORT_SYMBOL_GPL(ring_buffer_read_start);
5019
5020
5021
5022
5023
5024
5025
5026
5027void
5028ring_buffer_read_finish(struct ring_buffer_iter *iter)
5029{
5030 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
5031 unsigned long flags;
5032
5033
5034
5035
5036
5037
5038
5039 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5040 rb_check_pages(cpu_buffer);
5041 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5042
5043 atomic_dec(&cpu_buffer->resize_disabled);
5044 kfree(iter->event);
5045 kfree(iter);
5046}
5047EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
5048
5049
5050
5051
5052
5053
5054
5055
5056void ring_buffer_iter_advance(struct ring_buffer_iter *iter)
5057{
5058 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
5059 unsigned long flags;
5060
5061 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5062
5063 rb_advance_iter(iter);
5064
5065 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5066}
5067EXPORT_SYMBOL_GPL(ring_buffer_iter_advance);
5068
5069
5070
5071
5072
5073
5074unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
5075{
5076
5077
5078
5079
5080
5081
5082 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5083 return 0;
5084
5085 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
5086}
5087EXPORT_SYMBOL_GPL(ring_buffer_size);
5088
5089static void
5090rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
5091{
5092 rb_head_page_deactivate(cpu_buffer);
5093
5094 cpu_buffer->head_page
5095 = list_entry(cpu_buffer->pages, struct buffer_page, list);
5096 local_set(&cpu_buffer->head_page->write, 0);
5097 local_set(&cpu_buffer->head_page->entries, 0);
5098 local_set(&cpu_buffer->head_page->page->commit, 0);
5099
5100 cpu_buffer->head_page->read = 0;
5101
5102 cpu_buffer->tail_page = cpu_buffer->head_page;
5103 cpu_buffer->commit_page = cpu_buffer->head_page;
5104
5105 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
5106 INIT_LIST_HEAD(&cpu_buffer->new_pages);
5107 local_set(&cpu_buffer->reader_page->write, 0);
5108 local_set(&cpu_buffer->reader_page->entries, 0);
5109 local_set(&cpu_buffer->reader_page->page->commit, 0);
5110 cpu_buffer->reader_page->read = 0;
5111
5112 local_set(&cpu_buffer->entries_bytes, 0);
5113 local_set(&cpu_buffer->overrun, 0);
5114 local_set(&cpu_buffer->commit_overrun, 0);
5115 local_set(&cpu_buffer->dropped_events, 0);
5116 local_set(&cpu_buffer->entries, 0);
5117 local_set(&cpu_buffer->committing, 0);
5118 local_set(&cpu_buffer->commits, 0);
5119 local_set(&cpu_buffer->pages_touched, 0);
5120 local_set(&cpu_buffer->pages_read, 0);
5121 cpu_buffer->last_pages_touch = 0;
5122 cpu_buffer->shortest_full = 0;
5123 cpu_buffer->read = 0;
5124 cpu_buffer->read_bytes = 0;
5125
5126 rb_time_set(&cpu_buffer->write_stamp, 0);
5127 rb_time_set(&cpu_buffer->before_stamp, 0);
5128
5129 memset(cpu_buffer->event_stamp, 0, sizeof(cpu_buffer->event_stamp));
5130
5131 cpu_buffer->lost_events = 0;
5132 cpu_buffer->last_overrun = 0;
5133
5134 rb_head_page_activate(cpu_buffer);
5135}
5136
5137
5138static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
5139{
5140 unsigned long flags;
5141
5142 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5143
5144 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
5145 goto out;
5146
5147 arch_spin_lock(&cpu_buffer->lock);
5148
5149 rb_reset_cpu(cpu_buffer);
5150
5151 arch_spin_unlock(&cpu_buffer->lock);
5152
5153 out:
5154 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5155}
5156
5157
5158
5159
5160
5161
5162void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
5163{
5164 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
5165
5166 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5167 return;
5168
5169
5170 mutex_lock(&buffer->mutex);
5171
5172 atomic_inc(&cpu_buffer->resize_disabled);
5173 atomic_inc(&cpu_buffer->record_disabled);
5174
5175
5176 synchronize_rcu();
5177
5178 reset_disabled_cpu_buffer(cpu_buffer);
5179
5180 atomic_dec(&cpu_buffer->record_disabled);
5181 atomic_dec(&cpu_buffer->resize_disabled);
5182
5183 mutex_unlock(&buffer->mutex);
5184}
5185EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
5186
5187
5188
5189
5190
5191
5192void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
5193{
5194 struct ring_buffer_per_cpu *cpu_buffer;
5195 int cpu;
5196
5197
5198 mutex_lock(&buffer->mutex);
5199
5200 for_each_online_buffer_cpu(buffer, cpu) {
5201 cpu_buffer = buffer->buffers[cpu];
5202
5203 atomic_inc(&cpu_buffer->resize_disabled);
5204 atomic_inc(&cpu_buffer->record_disabled);
5205 }
5206
5207
5208 synchronize_rcu();
5209
5210 for_each_online_buffer_cpu(buffer, cpu) {
5211 cpu_buffer = buffer->buffers[cpu];
5212
5213 reset_disabled_cpu_buffer(cpu_buffer);
5214
5215 atomic_dec(&cpu_buffer->record_disabled);
5216 atomic_dec(&cpu_buffer->resize_disabled);
5217 }
5218
5219 mutex_unlock(&buffer->mutex);
5220}
5221
5222
5223
5224
5225
5226void ring_buffer_reset(struct trace_buffer *buffer)
5227{
5228 struct ring_buffer_per_cpu *cpu_buffer;
5229 int cpu;
5230
5231
5232 mutex_lock(&buffer->mutex);
5233
5234 for_each_buffer_cpu(buffer, cpu) {
5235 cpu_buffer = buffer->buffers[cpu];
5236
5237 atomic_inc(&cpu_buffer->resize_disabled);
5238 atomic_inc(&cpu_buffer->record_disabled);
5239 }
5240
5241
5242 synchronize_rcu();
5243
5244 for_each_buffer_cpu(buffer, cpu) {
5245 cpu_buffer = buffer->buffers[cpu];
5246
5247 reset_disabled_cpu_buffer(cpu_buffer);
5248
5249 atomic_dec(&cpu_buffer->record_disabled);
5250 atomic_dec(&cpu_buffer->resize_disabled);
5251 }
5252
5253 mutex_unlock(&buffer->mutex);
5254}
5255EXPORT_SYMBOL_GPL(ring_buffer_reset);
5256
5257
5258
5259
5260
5261bool ring_buffer_empty(struct trace_buffer *buffer)
5262{
5263 struct ring_buffer_per_cpu *cpu_buffer;
5264 unsigned long flags;
5265 bool dolock;
5266 int cpu;
5267 int ret;
5268
5269
5270 for_each_buffer_cpu(buffer, cpu) {
5271 cpu_buffer = buffer->buffers[cpu];
5272 local_irq_save(flags);
5273 dolock = rb_reader_lock(cpu_buffer);
5274 ret = rb_per_cpu_empty(cpu_buffer);
5275 rb_reader_unlock(cpu_buffer, dolock);
5276 local_irq_restore(flags);
5277
5278 if (!ret)
5279 return false;
5280 }
5281
5282 return true;
5283}
5284EXPORT_SYMBOL_GPL(ring_buffer_empty);
5285
5286
5287
5288
5289
5290
5291bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
5292{
5293 struct ring_buffer_per_cpu *cpu_buffer;
5294 unsigned long flags;
5295 bool dolock;
5296 int ret;
5297
5298 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5299 return true;
5300
5301 cpu_buffer = buffer->buffers[cpu];
5302 local_irq_save(flags);
5303 dolock = rb_reader_lock(cpu_buffer);
5304 ret = rb_per_cpu_empty(cpu_buffer);
5305 rb_reader_unlock(cpu_buffer, dolock);
5306 local_irq_restore(flags);
5307
5308 return ret;
5309}
5310EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
5311
5312#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
5325 struct trace_buffer *buffer_b, int cpu)
5326{
5327 struct ring_buffer_per_cpu *cpu_buffer_a;
5328 struct ring_buffer_per_cpu *cpu_buffer_b;
5329 int ret = -EINVAL;
5330
5331 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
5332 !cpumask_test_cpu(cpu, buffer_b->cpumask))
5333 goto out;
5334
5335 cpu_buffer_a = buffer_a->buffers[cpu];
5336 cpu_buffer_b = buffer_b->buffers[cpu];
5337
5338
5339 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
5340 goto out;
5341
5342 ret = -EAGAIN;
5343
5344 if (atomic_read(&buffer_a->record_disabled))
5345 goto out;
5346
5347 if (atomic_read(&buffer_b->record_disabled))
5348 goto out;
5349
5350 if (atomic_read(&cpu_buffer_a->record_disabled))
5351 goto out;
5352
5353 if (atomic_read(&cpu_buffer_b->record_disabled))
5354 goto out;
5355
5356
5357
5358
5359
5360
5361
5362 atomic_inc(&cpu_buffer_a->record_disabled);
5363 atomic_inc(&cpu_buffer_b->record_disabled);
5364
5365 ret = -EBUSY;
5366 if (local_read(&cpu_buffer_a->committing))
5367 goto out_dec;
5368 if (local_read(&cpu_buffer_b->committing))
5369 goto out_dec;
5370
5371 buffer_a->buffers[cpu] = cpu_buffer_b;
5372 buffer_b->buffers[cpu] = cpu_buffer_a;
5373
5374 cpu_buffer_b->buffer = buffer_a;
5375 cpu_buffer_a->buffer = buffer_b;
5376
5377 ret = 0;
5378
5379out_dec:
5380 atomic_dec(&cpu_buffer_a->record_disabled);
5381 atomic_dec(&cpu_buffer_b->record_disabled);
5382out:
5383 return ret;
5384}
5385EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
5386#endif
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404void *ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
5405{
5406 struct ring_buffer_per_cpu *cpu_buffer;
5407 struct buffer_data_page *bpage = NULL;
5408 unsigned long flags;
5409 struct page *page;
5410
5411 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5412 return ERR_PTR(-ENODEV);
5413
5414 cpu_buffer = buffer->buffers[cpu];
5415 local_irq_save(flags);
5416 arch_spin_lock(&cpu_buffer->lock);
5417
5418 if (cpu_buffer->free_page) {
5419 bpage = cpu_buffer->free_page;
5420 cpu_buffer->free_page = NULL;
5421 }
5422
5423 arch_spin_unlock(&cpu_buffer->lock);
5424 local_irq_restore(flags);
5425
5426 if (bpage)
5427 goto out;
5428
5429 page = alloc_pages_node(cpu_to_node(cpu),
5430 GFP_KERNEL | __GFP_NORETRY, 0);
5431 if (!page)
5432 return ERR_PTR(-ENOMEM);
5433
5434 bpage = page_address(page);
5435
5436 out:
5437 rb_init_page(bpage);
5438
5439 return bpage;
5440}
5441EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data)
5452{
5453 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
5454 struct buffer_data_page *bpage = data;
5455 struct page *page = virt_to_page(bpage);
5456 unsigned long flags;
5457
5458
5459 if (page_ref_count(page) > 1)
5460 goto out;
5461
5462 local_irq_save(flags);
5463 arch_spin_lock(&cpu_buffer->lock);
5464
5465 if (!cpu_buffer->free_page) {
5466 cpu_buffer->free_page = bpage;
5467 bpage = NULL;
5468 }
5469
5470 arch_spin_unlock(&cpu_buffer->lock);
5471 local_irq_restore(flags);
5472
5473 out:
5474 free_page((unsigned long)bpage);
5475}
5476EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511int ring_buffer_read_page(struct trace_buffer *buffer,
5512 void **data_page, size_t len, int cpu, int full)
5513{
5514 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
5515 struct ring_buffer_event *event;
5516 struct buffer_data_page *bpage;
5517 struct buffer_page *reader;
5518 unsigned long missed_events;
5519 unsigned long flags;
5520 unsigned int commit;
5521 unsigned int read;
5522 u64 save_timestamp;
5523 int ret = -1;
5524
5525 if (!cpumask_test_cpu(cpu, buffer->cpumask))
5526 goto out;
5527
5528
5529
5530
5531
5532 if (len <= BUF_PAGE_HDR_SIZE)
5533 goto out;
5534
5535 len -= BUF_PAGE_HDR_SIZE;
5536
5537 if (!data_page)
5538 goto out;
5539
5540 bpage = *data_page;
5541 if (!bpage)
5542 goto out;
5543
5544 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5545
5546 reader = rb_get_reader_page(cpu_buffer);
5547 if (!reader)
5548 goto out_unlock;
5549
5550 event = rb_reader_event(cpu_buffer);
5551
5552 read = reader->read;
5553 commit = rb_page_commit(reader);
5554
5555
5556 missed_events = cpu_buffer->lost_events;
5557
5558
5559
5560
5561
5562
5563
5564
5565 if (read || (len < (commit - read)) ||
5566 cpu_buffer->reader_page == cpu_buffer->commit_page) {
5567 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
5568 unsigned int rpos = read;
5569 unsigned int pos = 0;
5570 unsigned int size;
5571
5572 if (full)
5573 goto out_unlock;
5574
5575 if (len > (commit - read))
5576 len = (commit - read);
5577
5578
5579 size = rb_event_ts_length(event);
5580
5581 if (len < size)
5582 goto out_unlock;
5583
5584
5585 save_timestamp = cpu_buffer->read_stamp;
5586
5587
5588 do {
5589
5590
5591
5592
5593
5594
5595 size = rb_event_length(event);
5596 memcpy(bpage->data + pos, rpage->data + rpos, size);
5597
5598 len -= size;
5599
5600 rb_advance_reader(cpu_buffer);
5601 rpos = reader->read;
5602 pos += size;
5603
5604 if (rpos >= commit)
5605 break;
5606
5607 event = rb_reader_event(cpu_buffer);
5608
5609 size = rb_event_ts_length(event);
5610 } while (len >= size);
5611
5612
5613 local_set(&bpage->commit, pos);
5614 bpage->time_stamp = save_timestamp;
5615
5616
5617 read = 0;
5618 } else {
5619
5620 cpu_buffer->read += rb_page_entries(reader);
5621 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
5622
5623
5624 rb_init_page(bpage);
5625 bpage = reader->page;
5626 reader->page = *data_page;
5627 local_set(&reader->write, 0);
5628 local_set(&reader->entries, 0);
5629 reader->read = 0;
5630 *data_page = bpage;
5631
5632
5633
5634
5635
5636
5637 if (reader->real_end)
5638 local_set(&bpage->commit, reader->real_end);
5639 }
5640 ret = read;
5641
5642 cpu_buffer->lost_events = 0;
5643
5644 commit = local_read(&bpage->commit);
5645
5646
5647
5648 if (missed_events) {
5649
5650
5651
5652 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
5653 memcpy(&bpage->data[commit], &missed_events,
5654 sizeof(missed_events));
5655 local_add(RB_MISSED_STORED, &bpage->commit);
5656 commit += sizeof(missed_events);
5657 }
5658 local_add(RB_MISSED_EVENTS, &bpage->commit);
5659 }
5660
5661
5662
5663
5664 if (commit < BUF_PAGE_SIZE)
5665 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
5666
5667 out_unlock:
5668 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5669
5670 out:
5671 return ret;
5672}
5673EXPORT_SYMBOL_GPL(ring_buffer_read_page);
5674
5675
5676
5677
5678
5679
5680int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
5681{
5682 struct trace_buffer *buffer;
5683 long nr_pages_same;
5684 int cpu_i;
5685 unsigned long nr_pages;
5686
5687 buffer = container_of(node, struct trace_buffer, node);
5688 if (cpumask_test_cpu(cpu, buffer->cpumask))
5689 return 0;
5690
5691 nr_pages = 0;
5692 nr_pages_same = 1;
5693
5694 for_each_buffer_cpu(buffer, cpu_i) {
5695
5696 if (nr_pages == 0)
5697 nr_pages = buffer->buffers[cpu_i]->nr_pages;
5698 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
5699 nr_pages_same = 0;
5700 break;
5701 }
5702 }
5703
5704 if (!nr_pages_same)
5705 nr_pages = 2;
5706 buffer->buffers[cpu] =
5707 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
5708 if (!buffer->buffers[cpu]) {
5709 WARN(1, "failed to allocate ring buffer on CPU %u\n",
5710 cpu);
5711 return -ENOMEM;
5712 }
5713 smp_wmb();
5714 cpumask_set_cpu(cpu, buffer->cpumask);
5715 return 0;
5716}
5717
5718#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734static struct task_struct *rb_threads[NR_CPUS] __initdata;
5735
5736struct rb_test_data {
5737 struct trace_buffer *buffer;
5738 unsigned long events;
5739 unsigned long bytes_written;
5740 unsigned long bytes_alloc;
5741 unsigned long bytes_dropped;
5742 unsigned long events_nested;
5743 unsigned long bytes_written_nested;
5744 unsigned long bytes_alloc_nested;
5745 unsigned long bytes_dropped_nested;
5746 int min_size_nested;
5747 int max_size_nested;
5748 int max_size;
5749 int min_size;
5750 int cpu;
5751 int cnt;
5752};
5753
5754static struct rb_test_data rb_data[NR_CPUS] __initdata;
5755
5756
5757#define RB_TEST_BUFFER_SIZE 1048576
5758
5759static char rb_string[] __initdata =
5760 "abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
5761 "?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
5762 "!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
5763
5764static bool rb_test_started __initdata;
5765
5766struct rb_item {
5767 int size;
5768 char str[];
5769};
5770
5771static __init int rb_write_something(struct rb_test_data *data, bool nested)
5772{
5773 struct ring_buffer_event *event;
5774 struct rb_item *item;
5775 bool started;
5776 int event_len;
5777 int size;
5778 int len;
5779 int cnt;
5780
5781
5782 cnt = data->cnt + (nested ? 27 : 0);
5783
5784
5785 size = (cnt * 68 / 25) % (sizeof(rb_string) - 1);
5786
5787 len = size + sizeof(struct rb_item);
5788
5789 started = rb_test_started;
5790
5791 smp_rmb();
5792
5793 event = ring_buffer_lock_reserve(data->buffer, len);
5794 if (!event) {
5795
5796 if (started) {
5797 if (nested)
5798 data->bytes_dropped += len;
5799 else
5800 data->bytes_dropped_nested += len;
5801 }
5802 return len;
5803 }
5804
5805 event_len = ring_buffer_event_length(event);
5806
5807 if (RB_WARN_ON(data->buffer, event_len < len))
5808 goto out;
5809
5810 item = ring_buffer_event_data(event);
5811 item->size = size;
5812 memcpy(item->str, rb_string, size);
5813
5814 if (nested) {
5815 data->bytes_alloc_nested += event_len;
5816 data->bytes_written_nested += len;
5817 data->events_nested++;
5818 if (!data->min_size_nested || len < data->min_size_nested)
5819 data->min_size_nested = len;
5820 if (len > data->max_size_nested)
5821 data->max_size_nested = len;
5822 } else {
5823 data->bytes_alloc += event_len;
5824 data->bytes_written += len;
5825 data->events++;
5826 if (!data->min_size || len < data->min_size)
5827 data->max_size = len;
5828 if (len > data->max_size)
5829 data->max_size = len;
5830 }
5831
5832 out:
5833 ring_buffer_unlock_commit(data->buffer, event);
5834
5835 return 0;
5836}
5837
5838static __init int rb_test(void *arg)
5839{
5840 struct rb_test_data *data = arg;
5841
5842 while (!kthread_should_stop()) {
5843 rb_write_something(data, false);
5844 data->cnt++;
5845
5846 set_current_state(TASK_INTERRUPTIBLE);
5847
5848 usleep_range(((data->cnt % 3) + 1) * 100, 1000);
5849 }
5850
5851 return 0;
5852}
5853
5854static __init void rb_ipi(void *ignore)
5855{
5856 struct rb_test_data *data;
5857 int cpu = smp_processor_id();
5858
5859 data = &rb_data[cpu];
5860 rb_write_something(data, true);
5861}
5862
5863static __init int rb_hammer_test(void *arg)
5864{
5865 while (!kthread_should_stop()) {
5866
5867
5868 smp_call_function(rb_ipi, NULL, 1);
5869
5870 schedule();
5871 }
5872
5873 return 0;
5874}
5875
5876static __init int test_ringbuffer(void)
5877{
5878 struct task_struct *rb_hammer;
5879 struct trace_buffer *buffer;
5880 int cpu;
5881 int ret = 0;
5882
5883 if (security_locked_down(LOCKDOWN_TRACEFS)) {
5884 pr_warn("Lockdown is enabled, skipping ring buffer tests\n");
5885 return 0;
5886 }
5887
5888 pr_info("Running ring buffer tests...\n");
5889
5890 buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
5891 if (WARN_ON(!buffer))
5892 return 0;
5893
5894
5895 ring_buffer_record_off(buffer);
5896
5897 for_each_online_cpu(cpu) {
5898 rb_data[cpu].buffer = buffer;
5899 rb_data[cpu].cpu = cpu;
5900 rb_data[cpu].cnt = cpu;
5901 rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu],
5902 cpu, "rbtester/%u");
5903 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
5904 pr_cont("FAILED\n");
5905 ret = PTR_ERR(rb_threads[cpu]);
5906 goto out_free;
5907 }
5908 }
5909
5910
5911 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
5912 if (WARN_ON(IS_ERR(rb_hammer))) {
5913 pr_cont("FAILED\n");
5914 ret = PTR_ERR(rb_hammer);
5915 goto out_free;
5916 }
5917
5918 ring_buffer_record_on(buffer);
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928 smp_wmb();
5929 rb_test_started = true;
5930
5931 set_current_state(TASK_INTERRUPTIBLE);
5932 ;
5933 schedule_timeout(10 * HZ);
5934
5935 kthread_stop(rb_hammer);
5936
5937 out_free:
5938 for_each_online_cpu(cpu) {
5939 if (!rb_threads[cpu])
5940 break;
5941 kthread_stop(rb_threads[cpu]);
5942 }
5943 if (ret) {
5944 ring_buffer_free(buffer);
5945 return ret;
5946 }
5947
5948
5949 pr_info("finished\n");
5950 for_each_online_cpu(cpu) {
5951 struct ring_buffer_event *event;
5952 struct rb_test_data *data = &rb_data[cpu];
5953 struct rb_item *item;
5954 unsigned long total_events;
5955 unsigned long total_dropped;
5956 unsigned long total_written;
5957 unsigned long total_alloc;
5958 unsigned long total_read = 0;
5959 unsigned long total_size = 0;
5960 unsigned long total_len = 0;
5961 unsigned long total_lost = 0;
5962 unsigned long lost;
5963 int big_event_size;
5964 int small_event_size;
5965
5966 ret = -1;
5967
5968 total_events = data->events + data->events_nested;
5969 total_written = data->bytes_written + data->bytes_written_nested;
5970 total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
5971 total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
5972
5973 big_event_size = data->max_size + data->max_size_nested;
5974 small_event_size = data->min_size + data->min_size_nested;
5975
5976 pr_info("CPU %d:\n", cpu);
5977 pr_info(" events: %ld\n", total_events);
5978 pr_info(" dropped bytes: %ld\n", total_dropped);
5979 pr_info(" alloced bytes: %ld\n", total_alloc);
5980 pr_info(" written bytes: %ld\n", total_written);
5981 pr_info(" biggest event: %d\n", big_event_size);
5982 pr_info(" smallest event: %d\n", small_event_size);
5983
5984 if (RB_WARN_ON(buffer, total_dropped))
5985 break;
5986
5987 ret = 0;
5988
5989 while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
5990 total_lost += lost;
5991 item = ring_buffer_event_data(event);
5992 total_len += ring_buffer_event_length(event);
5993 total_size += item->size + sizeof(struct rb_item);
5994 if (memcmp(&item->str[0], rb_string, item->size) != 0) {
5995 pr_info("FAILED!\n");
5996 pr_info("buffer had: %.*s\n", item->size, item->str);
5997 pr_info("expected: %.*s\n", item->size, rb_string);
5998 RB_WARN_ON(buffer, 1);
5999 ret = -1;
6000 break;
6001 }
6002 total_read++;
6003 }
6004 if (ret)
6005 break;
6006
6007 ret = -1;
6008
6009 pr_info(" read events: %ld\n", total_read);
6010 pr_info(" lost events: %ld\n", total_lost);
6011 pr_info(" total events: %ld\n", total_lost + total_read);
6012 pr_info(" recorded len bytes: %ld\n", total_len);
6013 pr_info(" recorded size bytes: %ld\n", total_size);
6014 if (total_lost)
6015 pr_info(" With dropped events, record len and size may not match\n"
6016 " alloced and written from above\n");
6017 if (!total_lost) {
6018 if (RB_WARN_ON(buffer, total_len != total_alloc ||
6019 total_size != total_written))
6020 break;
6021 }
6022 if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
6023 break;
6024
6025 ret = 0;
6026 }
6027 if (!ret)
6028 pr_info("Ring buffer PASSED!\n");
6029
6030 ring_buffer_free(buffer);
6031 return 0;
6032}
6033
6034late_initcall(test_ringbuffer);
6035#endif
6036