1
2
3
4
5
6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h>
8#include <linux/spinlock.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/hardirq.h>
12#include <linux/kmemcheck.h>
13#include <linux/module.h>
14#include <linux/percpu.h>
15#include <linux/mutex.h>
16#include <linux/slab.h>
17#include <linux/init.h>
18#include <linux/hash.h>
19#include <linux/list.h>
20#include <linux/cpu.h>
21#include <linux/fs.h>
22
23#include <asm/local.h>
24#include "trace.h"
25
26static void update_pages_handler(struct work_struct *work);
27
28
29
30
31int ring_buffer_print_entry_header(struct trace_seq *s)
32{
33 int ret;
34
35 ret = trace_seq_printf(s, "# compressed entry header\n");
36 ret = trace_seq_printf(s, "\ttype_len : 5 bits\n");
37 ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n");
38 ret = trace_seq_printf(s, "\tarray : 32 bits\n");
39 ret = trace_seq_printf(s, "\n");
40 ret = trace_seq_printf(s, "\tpadding : type == %d\n",
41 RINGBUF_TYPE_PADDING);
42 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
43 RINGBUF_TYPE_TIME_EXTEND);
44 ret = trace_seq_printf(s, "\tdata max type_len == %d\n",
45 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
46
47 return ret;
48}
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147enum {
148 RB_BUFFERS_ON_BIT = 0,
149 RB_BUFFERS_DISABLED_BIT = 1,
150};
151
152enum {
153 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
154 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
155};
156
157static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
158
159
160#define RB_BUFFER_OFF (1 << 20)
161
162#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
163
164
165
166
167
168
169
170void tracing_off_permanent(void)
171{
172 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
173}
174
175#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
176#define RB_ALIGNMENT 4U
177#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
178#define RB_EVNT_MIN_SIZE 8U
179
180#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
181# define RB_FORCE_8BYTE_ALIGNMENT 0
182# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
183#else
184# define RB_FORCE_8BYTE_ALIGNMENT 1
185# define RB_ARCH_ALIGNMENT 8U
186#endif
187
188
189#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
190
191enum {
192 RB_LEN_TIME_EXTEND = 8,
193 RB_LEN_TIME_STAMP = 16,
194};
195
196#define skip_time_extend(event) \
197 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
198
199static inline int rb_null_event(struct ring_buffer_event *event)
200{
201 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
202}
203
204static void rb_event_set_padding(struct ring_buffer_event *event)
205{
206
207 event->type_len = RINGBUF_TYPE_PADDING;
208 event->time_delta = 0;
209}
210
211static unsigned
212rb_event_data_length(struct ring_buffer_event *event)
213{
214 unsigned length;
215
216 if (event->type_len)
217 length = event->type_len * RB_ALIGNMENT;
218 else
219 length = event->array[0];
220 return length + RB_EVNT_HDR_SIZE;
221}
222
223
224
225
226
227
228static inline unsigned
229rb_event_length(struct ring_buffer_event *event)
230{
231 switch (event->type_len) {
232 case RINGBUF_TYPE_PADDING:
233 if (rb_null_event(event))
234
235 return -1;
236 return event->array[0] + RB_EVNT_HDR_SIZE;
237
238 case RINGBUF_TYPE_TIME_EXTEND:
239 return RB_LEN_TIME_EXTEND;
240
241 case RINGBUF_TYPE_TIME_STAMP:
242 return RB_LEN_TIME_STAMP;
243
244 case RINGBUF_TYPE_DATA:
245 return rb_event_data_length(event);
246 default:
247 BUG();
248 }
249
250 return 0;
251}
252
253
254
255
256
257static inline unsigned
258rb_event_ts_length(struct ring_buffer_event *event)
259{
260 unsigned len = 0;
261
262 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
263
264 len = RB_LEN_TIME_EXTEND;
265 event = skip_time_extend(event);
266 }
267 return len + rb_event_length(event);
268}
269
270
271
272
273
274
275
276
277
278
279
280unsigned ring_buffer_event_length(struct ring_buffer_event *event)
281{
282 unsigned length;
283
284 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
285 event = skip_time_extend(event);
286
287 length = rb_event_length(event);
288 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
289 return length;
290 length -= RB_EVNT_HDR_SIZE;
291 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
292 length -= sizeof(event->array[0]);
293 return length;
294}
295EXPORT_SYMBOL_GPL(ring_buffer_event_length);
296
297
298static void *
299rb_event_data(struct ring_buffer_event *event)
300{
301 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
302 event = skip_time_extend(event);
303 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
304
305 if (event->type_len)
306 return (void *)&event->array[0];
307
308 return (void *)&event->array[1];
309}
310
311
312
313
314
315void *ring_buffer_event_data(struct ring_buffer_event *event)
316{
317 return rb_event_data(event);
318}
319EXPORT_SYMBOL_GPL(ring_buffer_event_data);
320
321#define for_each_buffer_cpu(buffer, cpu) \
322 for_each_cpu(cpu, buffer->cpumask)
323
324#define TS_SHIFT 27
325#define TS_MASK ((1ULL << TS_SHIFT) - 1)
326#define TS_DELTA_TEST (~TS_MASK)
327
328
329#define RB_MISSED_EVENTS (1 << 31)
330
331#define RB_MISSED_STORED (1 << 30)
332
333struct buffer_data_page {
334 u64 time_stamp;
335 local_t commit;
336 unsigned char data[];
337};
338
339
340
341
342
343
344
345
346
347struct buffer_page {
348 struct list_head list;
349 local_t write;
350 unsigned read;
351 local_t entries;
352 unsigned long real_end;
353 struct buffer_data_page *page;
354};
355
356
357
358
359
360
361
362
363
364
365
366
367
368#define RB_WRITE_MASK 0xfffff
369#define RB_WRITE_INTCNT (1 << 20)
370
371static void rb_init_page(struct buffer_data_page *bpage)
372{
373 local_set(&bpage->commit, 0);
374}
375
376
377
378
379
380
381
382size_t ring_buffer_page_len(void *page)
383{
384 return local_read(&((struct buffer_data_page *)page)->commit)
385 + BUF_PAGE_HDR_SIZE;
386}
387
388
389
390
391
392static void free_buffer_page(struct buffer_page *bpage)
393{
394 free_page((unsigned long)bpage->page);
395 kfree(bpage);
396}
397
398
399
400
401static inline int test_time_stamp(u64 delta)
402{
403 if (delta & TS_DELTA_TEST)
404 return 1;
405 return 0;
406}
407
408#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
409
410
411#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
412
413int ring_buffer_print_page_header(struct trace_seq *s)
414{
415 struct buffer_data_page field;
416 int ret;
417
418 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
419 "offset:0;\tsize:%u;\tsigned:%u;\n",
420 (unsigned int)sizeof(field.time_stamp),
421 (unsigned int)is_signed_type(u64));
422
423 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
424 "offset:%u;\tsize:%u;\tsigned:%u;\n",
425 (unsigned int)offsetof(typeof(field), commit),
426 (unsigned int)sizeof(field.commit),
427 (unsigned int)is_signed_type(long));
428
429 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
430 "offset:%u;\tsize:%u;\tsigned:%u;\n",
431 (unsigned int)offsetof(typeof(field), commit),
432 1,
433 (unsigned int)is_signed_type(long));
434
435 ret = trace_seq_printf(s, "\tfield: char data;\t"
436 "offset:%u;\tsize:%u;\tsigned:%u;\n",
437 (unsigned int)offsetof(typeof(field), data),
438 (unsigned int)BUF_PAGE_SIZE,
439 (unsigned int)is_signed_type(char));
440
441 return ret;
442}
443
444
445
446
447struct ring_buffer_per_cpu {
448 int cpu;
449 atomic_t record_disabled;
450 struct ring_buffer *buffer;
451 raw_spinlock_t reader_lock;
452 arch_spinlock_t lock;
453 struct lock_class_key lock_key;
454 unsigned int nr_pages;
455 struct list_head *pages;
456 struct buffer_page *head_page;
457 struct buffer_page *tail_page;
458 struct buffer_page *commit_page;
459 struct buffer_page *reader_page;
460 unsigned long lost_events;
461 unsigned long last_overrun;
462 local_t entries_bytes;
463 local_t commit_overrun;
464 local_t overrun;
465 local_t entries;
466 local_t committing;
467 local_t commits;
468 unsigned long read;
469 unsigned long read_bytes;
470 u64 write_stamp;
471 u64 read_stamp;
472
473 int nr_pages_to_update;
474 struct list_head new_pages;
475 struct work_struct update_pages_work;
476 struct completion update_done;
477};
478
479struct ring_buffer {
480 unsigned flags;
481 int cpus;
482 atomic_t record_disabled;
483 atomic_t resize_disabled;
484 cpumask_var_t cpumask;
485
486 struct lock_class_key *reader_lock_key;
487
488 struct mutex mutex;
489
490 struct ring_buffer_per_cpu **buffers;
491
492#ifdef CONFIG_HOTPLUG_CPU
493 struct notifier_block cpu_notify;
494#endif
495 u64 (*clock)(void);
496};
497
498struct ring_buffer_iter {
499 struct ring_buffer_per_cpu *cpu_buffer;
500 unsigned long head;
501 struct buffer_page *head_page;
502 struct buffer_page *cache_reader_page;
503 unsigned long cache_read;
504 u64 read_stamp;
505};
506
507
508#define RB_WARN_ON(b, cond) \
509 ({ \
510 int _____ret = unlikely(cond); \
511 if (_____ret) { \
512 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
513 struct ring_buffer_per_cpu *__b = \
514 (void *)b; \
515 atomic_inc(&__b->buffer->record_disabled); \
516 } else \
517 atomic_inc(&b->record_disabled); \
518 WARN_ON(1); \
519 } \
520 _____ret; \
521 })
522
523
524#define DEBUG_SHIFT 0
525
526static inline u64 rb_time_stamp(struct ring_buffer *buffer)
527{
528
529 return buffer->clock() << DEBUG_SHIFT;
530}
531
532u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
533{
534 u64 time;
535
536 preempt_disable_notrace();
537 time = rb_time_stamp(buffer);
538 preempt_enable_no_resched_notrace();
539
540 return time;
541}
542EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
543
544void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
545 int cpu, u64 *ts)
546{
547
548 *ts >>= DEBUG_SHIFT;
549}
550EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621#define RB_PAGE_NORMAL 0UL
622#define RB_PAGE_HEAD 1UL
623#define RB_PAGE_UPDATE 2UL
624
625
626#define RB_FLAG_MASK 3UL
627
628
629#define RB_PAGE_MOVED 4UL
630
631
632
633
634static struct list_head *rb_list_head(struct list_head *list)
635{
636 unsigned long val = (unsigned long)list;
637
638 return (struct list_head *)(val & ~RB_FLAG_MASK);
639}
640
641
642
643
644
645
646
647
648
649static inline int
650rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
651 struct buffer_page *page, struct list_head *list)
652{
653 unsigned long val;
654
655 val = (unsigned long)list->next;
656
657 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
658 return RB_PAGE_MOVED;
659
660 return val & RB_FLAG_MASK;
661}
662
663
664
665
666
667
668
669
670static int rb_is_reader_page(struct buffer_page *page)
671{
672 struct list_head *list = page->list.prev;
673
674 return rb_list_head(list->next) != &page->list;
675}
676
677
678
679
680static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
681 struct list_head *list)
682{
683 unsigned long *ptr;
684
685 ptr = (unsigned long *)&list->next;
686 *ptr |= RB_PAGE_HEAD;
687 *ptr &= ~RB_PAGE_UPDATE;
688}
689
690
691
692
693static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
694{
695 struct buffer_page *head;
696
697 head = cpu_buffer->head_page;
698 if (!head)
699 return;
700
701
702
703
704 rb_set_list_to_head(cpu_buffer, head->list.prev);
705}
706
707static void rb_list_head_clear(struct list_head *list)
708{
709 unsigned long *ptr = (unsigned long *)&list->next;
710
711 *ptr &= ~RB_FLAG_MASK;
712}
713
714
715
716
717static void
718rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
719{
720 struct list_head *hd;
721
722
723 rb_list_head_clear(cpu_buffer->pages);
724
725 list_for_each(hd, cpu_buffer->pages)
726 rb_list_head_clear(hd);
727}
728
729static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
730 struct buffer_page *head,
731 struct buffer_page *prev,
732 int old_flag, int new_flag)
733{
734 struct list_head *list;
735 unsigned long val = (unsigned long)&head->list;
736 unsigned long ret;
737
738 list = &prev->list;
739
740 val &= ~RB_FLAG_MASK;
741
742 ret = cmpxchg((unsigned long *)&list->next,
743 val | old_flag, val | new_flag);
744
745
746 if ((ret & ~RB_FLAG_MASK) != val)
747 return RB_PAGE_MOVED;
748
749 return ret & RB_FLAG_MASK;
750}
751
752static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
753 struct buffer_page *head,
754 struct buffer_page *prev,
755 int old_flag)
756{
757 return rb_head_page_set(cpu_buffer, head, prev,
758 old_flag, RB_PAGE_UPDATE);
759}
760
761static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
762 struct buffer_page *head,
763 struct buffer_page *prev,
764 int old_flag)
765{
766 return rb_head_page_set(cpu_buffer, head, prev,
767 old_flag, RB_PAGE_HEAD);
768}
769
770static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
771 struct buffer_page *head,
772 struct buffer_page *prev,
773 int old_flag)
774{
775 return rb_head_page_set(cpu_buffer, head, prev,
776 old_flag, RB_PAGE_NORMAL);
777}
778
779static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
780 struct buffer_page **bpage)
781{
782 struct list_head *p = rb_list_head((*bpage)->list.next);
783
784 *bpage = list_entry(p, struct buffer_page, list);
785}
786
787static struct buffer_page *
788rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
789{
790 struct buffer_page *head;
791 struct buffer_page *page;
792 struct list_head *list;
793 int i;
794
795 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
796 return NULL;
797
798
799 list = cpu_buffer->pages;
800 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
801 return NULL;
802
803 page = head = cpu_buffer->head_page;
804
805
806
807
808
809
810 for (i = 0; i < 3; i++) {
811 do {
812 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
813 cpu_buffer->head_page = page;
814 return page;
815 }
816 rb_inc_page(cpu_buffer, &page);
817 } while (page != head);
818 }
819
820 RB_WARN_ON(cpu_buffer, 1);
821
822 return NULL;
823}
824
825static int rb_head_page_replace(struct buffer_page *old,
826 struct buffer_page *new)
827{
828 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
829 unsigned long val;
830 unsigned long ret;
831
832 val = *ptr & ~RB_FLAG_MASK;
833 val |= RB_PAGE_HEAD;
834
835 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
836
837 return ret == val;
838}
839
840
841
842
843
844
845static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
846 struct buffer_page *tail_page,
847 struct buffer_page *next_page)
848{
849 struct buffer_page *old_tail;
850 unsigned long old_entries;
851 unsigned long old_write;
852 int ret = 0;
853
854
855
856
857
858
859
860
861
862
863 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
864 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
865
866
867
868
869
870 barrier();
871
872
873
874
875
876
877 if (tail_page == cpu_buffer->tail_page) {
878
879 unsigned long val = old_write & ~RB_WRITE_MASK;
880 unsigned long eval = old_entries & ~RB_WRITE_MASK;
881
882
883
884
885
886
887
888
889
890
891
892 (void)local_cmpxchg(&next_page->write, old_write, val);
893 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
894
895
896
897
898
899
900 local_set(&next_page->page->commit, 0);
901
902 old_tail = cmpxchg(&cpu_buffer->tail_page,
903 tail_page, next_page);
904
905 if (old_tail == tail_page)
906 ret = 1;
907 }
908
909 return ret;
910}
911
912static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
913 struct buffer_page *bpage)
914{
915 unsigned long val = (unsigned long)bpage;
916
917 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
918 return 1;
919
920 return 0;
921}
922
923
924
925
926static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
927 struct list_head *list)
928{
929 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
930 return 1;
931 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
932 return 1;
933 return 0;
934}
935
936
937
938
939
940
941
942
943static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
944{
945 struct list_head *head = cpu_buffer->pages;
946 struct buffer_page *bpage, *tmp;
947
948
949 if (cpu_buffer->head_page)
950 rb_set_head_page(cpu_buffer);
951
952 rb_head_page_deactivate(cpu_buffer);
953
954 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
955 return -1;
956 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
957 return -1;
958
959 if (rb_check_list(cpu_buffer, head))
960 return -1;
961
962 list_for_each_entry_safe(bpage, tmp, head, list) {
963 if (RB_WARN_ON(cpu_buffer,
964 bpage->list.next->prev != &bpage->list))
965 return -1;
966 if (RB_WARN_ON(cpu_buffer,
967 bpage->list.prev->next != &bpage->list))
968 return -1;
969 if (rb_check_list(cpu_buffer, &bpage->list))
970 return -1;
971 }
972
973 rb_head_page_activate(cpu_buffer);
974
975 return 0;
976}
977
978static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
979{
980 int i;
981 struct buffer_page *bpage, *tmp;
982
983 for (i = 0; i < nr_pages; i++) {
984 struct page *page;
985
986
987
988
989
990 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
991 GFP_KERNEL | __GFP_NORETRY,
992 cpu_to_node(cpu));
993 if (!bpage)
994 goto free_pages;
995
996 list_add(&bpage->list, pages);
997
998 page = alloc_pages_node(cpu_to_node(cpu),
999 GFP_KERNEL | __GFP_NORETRY, 0);
1000 if (!page)
1001 goto free_pages;
1002 bpage->page = page_address(page);
1003 rb_init_page(bpage->page);
1004 }
1005
1006 return 0;
1007
1008free_pages:
1009 list_for_each_entry_safe(bpage, tmp, pages, list) {
1010 list_del_init(&bpage->list);
1011 free_buffer_page(bpage);
1012 }
1013
1014 return -ENOMEM;
1015}
1016
1017static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1018 unsigned nr_pages)
1019{
1020 LIST_HEAD(pages);
1021
1022 WARN_ON(!nr_pages);
1023
1024 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1025 return -ENOMEM;
1026
1027
1028
1029
1030
1031
1032 cpu_buffer->pages = pages.next;
1033 list_del(&pages);
1034
1035 cpu_buffer->nr_pages = nr_pages;
1036
1037 rb_check_pages(cpu_buffer);
1038
1039 return 0;
1040}
1041
1042static struct ring_buffer_per_cpu *
1043rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1044{
1045 struct ring_buffer_per_cpu *cpu_buffer;
1046 struct buffer_page *bpage;
1047 struct page *page;
1048 int ret;
1049
1050 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1051 GFP_KERNEL, cpu_to_node(cpu));
1052 if (!cpu_buffer)
1053 return NULL;
1054
1055 cpu_buffer->cpu = cpu;
1056 cpu_buffer->buffer = buffer;
1057 raw_spin_lock_init(&cpu_buffer->reader_lock);
1058 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1059 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1060 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1061 init_completion(&cpu_buffer->update_done);
1062
1063 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1064 GFP_KERNEL, cpu_to_node(cpu));
1065 if (!bpage)
1066 goto fail_free_buffer;
1067
1068 rb_check_bpage(cpu_buffer, bpage);
1069
1070 cpu_buffer->reader_page = bpage;
1071 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1072 if (!page)
1073 goto fail_free_reader;
1074 bpage->page = page_address(page);
1075 rb_init_page(bpage->page);
1076
1077 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1078 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1079
1080 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1081 if (ret < 0)
1082 goto fail_free_reader;
1083
1084 cpu_buffer->head_page
1085 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1086 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1087
1088 rb_head_page_activate(cpu_buffer);
1089
1090 return cpu_buffer;
1091
1092 fail_free_reader:
1093 free_buffer_page(cpu_buffer->reader_page);
1094
1095 fail_free_buffer:
1096 kfree(cpu_buffer);
1097 return NULL;
1098}
1099
1100static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1101{
1102 struct list_head *head = cpu_buffer->pages;
1103 struct buffer_page *bpage, *tmp;
1104
1105 free_buffer_page(cpu_buffer->reader_page);
1106
1107 rb_head_page_deactivate(cpu_buffer);
1108
1109 if (head) {
1110 list_for_each_entry_safe(bpage, tmp, head, list) {
1111 list_del_init(&bpage->list);
1112 free_buffer_page(bpage);
1113 }
1114 bpage = list_entry(head, struct buffer_page, list);
1115 free_buffer_page(bpage);
1116 }
1117
1118 kfree(cpu_buffer);
1119}
1120
1121#ifdef CONFIG_HOTPLUG_CPU
1122static int rb_cpu_notify(struct notifier_block *self,
1123 unsigned long action, void *hcpu);
1124#endif
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1137 struct lock_class_key *key)
1138{
1139 struct ring_buffer *buffer;
1140 int bsize;
1141 int cpu, nr_pages;
1142
1143
1144 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1145 GFP_KERNEL);
1146 if (!buffer)
1147 return NULL;
1148
1149 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1150 goto fail_free_buffer;
1151
1152 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1153 buffer->flags = flags;
1154 buffer->clock = trace_clock_local;
1155 buffer->reader_lock_key = key;
1156
1157
1158 if (nr_pages < 2)
1159 nr_pages = 2;
1160
1161
1162
1163
1164
1165
1166#ifdef CONFIG_HOTPLUG_CPU
1167 get_online_cpus();
1168 cpumask_copy(buffer->cpumask, cpu_online_mask);
1169#else
1170 cpumask_copy(buffer->cpumask, cpu_possible_mask);
1171#endif
1172 buffer->cpus = nr_cpu_ids;
1173
1174 bsize = sizeof(void *) * nr_cpu_ids;
1175 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1176 GFP_KERNEL);
1177 if (!buffer->buffers)
1178 goto fail_free_cpumask;
1179
1180 for_each_buffer_cpu(buffer, cpu) {
1181 buffer->buffers[cpu] =
1182 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1183 if (!buffer->buffers[cpu])
1184 goto fail_free_buffers;
1185 }
1186
1187#ifdef CONFIG_HOTPLUG_CPU
1188 buffer->cpu_notify.notifier_call = rb_cpu_notify;
1189 buffer->cpu_notify.priority = 0;
1190 register_cpu_notifier(&buffer->cpu_notify);
1191#endif
1192
1193 put_online_cpus();
1194 mutex_init(&buffer->mutex);
1195
1196 return buffer;
1197
1198 fail_free_buffers:
1199 for_each_buffer_cpu(buffer, cpu) {
1200 if (buffer->buffers[cpu])
1201 rb_free_cpu_buffer(buffer->buffers[cpu]);
1202 }
1203 kfree(buffer->buffers);
1204
1205 fail_free_cpumask:
1206 free_cpumask_var(buffer->cpumask);
1207 put_online_cpus();
1208
1209 fail_free_buffer:
1210 kfree(buffer);
1211 return NULL;
1212}
1213EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1214
1215
1216
1217
1218
1219void
1220ring_buffer_free(struct ring_buffer *buffer)
1221{
1222 int cpu;
1223
1224 get_online_cpus();
1225
1226#ifdef CONFIG_HOTPLUG_CPU
1227 unregister_cpu_notifier(&buffer->cpu_notify);
1228#endif
1229
1230 for_each_buffer_cpu(buffer, cpu)
1231 rb_free_cpu_buffer(buffer->buffers[cpu]);
1232
1233 put_online_cpus();
1234
1235 kfree(buffer->buffers);
1236 free_cpumask_var(buffer->cpumask);
1237
1238 kfree(buffer);
1239}
1240EXPORT_SYMBOL_GPL(ring_buffer_free);
1241
1242void ring_buffer_set_clock(struct ring_buffer *buffer,
1243 u64 (*clock)(void))
1244{
1245 buffer->clock = clock;
1246}
1247
1248static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1249
1250static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1251{
1252 return local_read(&bpage->entries) & RB_WRITE_MASK;
1253}
1254
1255static inline unsigned long rb_page_write(struct buffer_page *bpage)
1256{
1257 return local_read(&bpage->write) & RB_WRITE_MASK;
1258}
1259
1260static int
1261rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1262{
1263 struct list_head *tail_page, *to_remove, *next_page;
1264 struct buffer_page *to_remove_page, *tmp_iter_page;
1265 struct buffer_page *last_page, *first_page;
1266 unsigned int nr_removed;
1267 unsigned long head_bit;
1268 int page_entries;
1269
1270 head_bit = 0;
1271
1272 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1273 atomic_inc(&cpu_buffer->record_disabled);
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283 tail_page = &cpu_buffer->tail_page->list;
1284
1285
1286
1287
1288
1289 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1290 tail_page = rb_list_head(tail_page->next);
1291 to_remove = tail_page;
1292
1293
1294 first_page = list_entry(rb_list_head(to_remove->next),
1295 struct buffer_page, list);
1296
1297 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1298 to_remove = rb_list_head(to_remove)->next;
1299 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1300 }
1301
1302 next_page = rb_list_head(to_remove)->next;
1303
1304
1305
1306
1307
1308
1309 tail_page->next = (struct list_head *)((unsigned long)next_page |
1310 head_bit);
1311 next_page = rb_list_head(next_page);
1312 next_page->prev = tail_page;
1313
1314
1315 cpu_buffer->pages = next_page;
1316
1317
1318 if (head_bit)
1319 cpu_buffer->head_page = list_entry(next_page,
1320 struct buffer_page, list);
1321
1322
1323
1324
1325
1326 cpu_buffer->read = 0;
1327
1328
1329 atomic_dec(&cpu_buffer->record_disabled);
1330 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1331
1332 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1333
1334
1335 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1336 list);
1337 tmp_iter_page = first_page;
1338
1339 do {
1340 to_remove_page = tmp_iter_page;
1341 rb_inc_page(cpu_buffer, &tmp_iter_page);
1342
1343
1344 page_entries = rb_page_entries(to_remove_page);
1345 if (page_entries) {
1346
1347
1348
1349
1350
1351
1352 local_add(page_entries, &cpu_buffer->overrun);
1353 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1354 }
1355
1356
1357
1358
1359
1360 free_buffer_page(to_remove_page);
1361 nr_removed--;
1362
1363 } while (to_remove_page != last_page);
1364
1365 RB_WARN_ON(cpu_buffer, nr_removed);
1366
1367 return nr_removed == 0;
1368}
1369
1370static int
1371rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1372{
1373 struct list_head *pages = &cpu_buffer->new_pages;
1374 int retries, success;
1375
1376 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391 retries = 10;
1392 success = 0;
1393 while (retries--) {
1394 struct list_head *head_page, *prev_page, *r;
1395 struct list_head *last_page, *first_page;
1396 struct list_head *head_page_with_bit;
1397
1398 head_page = &rb_set_head_page(cpu_buffer)->list;
1399 prev_page = head_page->prev;
1400
1401 first_page = pages->next;
1402 last_page = pages->prev;
1403
1404 head_page_with_bit = (struct list_head *)
1405 ((unsigned long)head_page | RB_PAGE_HEAD);
1406
1407 last_page->next = head_page_with_bit;
1408 first_page->prev = prev_page;
1409
1410 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1411
1412 if (r == head_page_with_bit) {
1413
1414
1415
1416
1417
1418 head_page->prev = last_page;
1419 success = 1;
1420 break;
1421 }
1422 }
1423
1424 if (success)
1425 INIT_LIST_HEAD(pages);
1426
1427
1428
1429
1430 RB_WARN_ON(cpu_buffer, !success);
1431 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1432
1433
1434 if (!success) {
1435 struct buffer_page *bpage, *tmp;
1436 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1437 list) {
1438 list_del_init(&bpage->list);
1439 free_buffer_page(bpage);
1440 }
1441 }
1442 return success;
1443}
1444
1445static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1446{
1447 int success;
1448
1449 if (cpu_buffer->nr_pages_to_update > 0)
1450 success = rb_insert_pages(cpu_buffer);
1451 else
1452 success = rb_remove_pages(cpu_buffer,
1453 -cpu_buffer->nr_pages_to_update);
1454
1455 if (success)
1456 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1457}
1458
1459static void update_pages_handler(struct work_struct *work)
1460{
1461 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1462 struct ring_buffer_per_cpu, update_pages_work);
1463 rb_update_pages(cpu_buffer);
1464 complete(&cpu_buffer->update_done);
1465}
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1477 int cpu_id)
1478{
1479 struct ring_buffer_per_cpu *cpu_buffer;
1480 unsigned nr_pages;
1481 int cpu, err = 0;
1482
1483
1484
1485
1486 if (!buffer)
1487 return size;
1488
1489
1490 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1491 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1492 return size;
1493
1494 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1495 size *= BUF_PAGE_SIZE;
1496
1497
1498 if (size < BUF_PAGE_SIZE * 2)
1499 size = BUF_PAGE_SIZE * 2;
1500
1501 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1502
1503
1504
1505
1506
1507
1508 if (atomic_read(&buffer->resize_disabled))
1509 return -EBUSY;
1510
1511
1512 mutex_lock(&buffer->mutex);
1513
1514 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1515
1516 for_each_buffer_cpu(buffer, cpu) {
1517 cpu_buffer = buffer->buffers[cpu];
1518
1519 cpu_buffer->nr_pages_to_update = nr_pages -
1520 cpu_buffer->nr_pages;
1521
1522
1523
1524 if (cpu_buffer->nr_pages_to_update <= 0)
1525 continue;
1526
1527
1528
1529
1530 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1531 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1532 &cpu_buffer->new_pages, cpu)) {
1533
1534 err = -ENOMEM;
1535 goto out_err;
1536 }
1537 }
1538
1539 get_online_cpus();
1540
1541
1542
1543
1544
1545 for_each_buffer_cpu(buffer, cpu) {
1546 cpu_buffer = buffer->buffers[cpu];
1547 if (!cpu_buffer->nr_pages_to_update)
1548 continue;
1549
1550 if (cpu_online(cpu))
1551 schedule_work_on(cpu,
1552 &cpu_buffer->update_pages_work);
1553 else
1554 rb_update_pages(cpu_buffer);
1555 }
1556
1557
1558 for_each_buffer_cpu(buffer, cpu) {
1559 cpu_buffer = buffer->buffers[cpu];
1560 if (!cpu_buffer->nr_pages_to_update)
1561 continue;
1562
1563 if (cpu_online(cpu))
1564 wait_for_completion(&cpu_buffer->update_done);
1565 cpu_buffer->nr_pages_to_update = 0;
1566 }
1567
1568 put_online_cpus();
1569 } else {
1570 cpu_buffer = buffer->buffers[cpu_id];
1571
1572 if (nr_pages == cpu_buffer->nr_pages)
1573 goto out;
1574
1575 cpu_buffer->nr_pages_to_update = nr_pages -
1576 cpu_buffer->nr_pages;
1577
1578 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1579 if (cpu_buffer->nr_pages_to_update > 0 &&
1580 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1581 &cpu_buffer->new_pages, cpu_id)) {
1582 err = -ENOMEM;
1583 goto out_err;
1584 }
1585
1586 get_online_cpus();
1587
1588 if (cpu_online(cpu_id)) {
1589 schedule_work_on(cpu_id,
1590 &cpu_buffer->update_pages_work);
1591 wait_for_completion(&cpu_buffer->update_done);
1592 } else
1593 rb_update_pages(cpu_buffer);
1594
1595 cpu_buffer->nr_pages_to_update = 0;
1596 put_online_cpus();
1597 }
1598
1599 out:
1600
1601
1602
1603
1604
1605
1606
1607 if (atomic_read(&buffer->record_disabled)) {
1608 atomic_inc(&buffer->record_disabled);
1609
1610
1611
1612
1613
1614
1615 synchronize_sched();
1616 for_each_buffer_cpu(buffer, cpu) {
1617 cpu_buffer = buffer->buffers[cpu];
1618 rb_check_pages(cpu_buffer);
1619 }
1620 atomic_dec(&buffer->record_disabled);
1621 }
1622
1623 mutex_unlock(&buffer->mutex);
1624 return size;
1625
1626 out_err:
1627 for_each_buffer_cpu(buffer, cpu) {
1628 struct buffer_page *bpage, *tmp;
1629
1630 cpu_buffer = buffer->buffers[cpu];
1631 cpu_buffer->nr_pages_to_update = 0;
1632
1633 if (list_empty(&cpu_buffer->new_pages))
1634 continue;
1635
1636 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1637 list) {
1638 list_del_init(&bpage->list);
1639 free_buffer_page(bpage);
1640 }
1641 }
1642 mutex_unlock(&buffer->mutex);
1643 return err;
1644}
1645EXPORT_SYMBOL_GPL(ring_buffer_resize);
1646
1647void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1648{
1649 mutex_lock(&buffer->mutex);
1650 if (val)
1651 buffer->flags |= RB_FL_OVERWRITE;
1652 else
1653 buffer->flags &= ~RB_FL_OVERWRITE;
1654 mutex_unlock(&buffer->mutex);
1655}
1656EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1657
1658static inline void *
1659__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1660{
1661 return bpage->data + index;
1662}
1663
1664static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1665{
1666 return bpage->page->data + index;
1667}
1668
1669static inline struct ring_buffer_event *
1670rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1671{
1672 return __rb_page_index(cpu_buffer->reader_page,
1673 cpu_buffer->reader_page->read);
1674}
1675
1676static inline struct ring_buffer_event *
1677rb_iter_head_event(struct ring_buffer_iter *iter)
1678{
1679 return __rb_page_index(iter->head_page, iter->head);
1680}
1681
1682static inline unsigned rb_page_commit(struct buffer_page *bpage)
1683{
1684 return local_read(&bpage->page->commit);
1685}
1686
1687
1688static inline unsigned rb_page_size(struct buffer_page *bpage)
1689{
1690 return rb_page_commit(bpage);
1691}
1692
1693static inline unsigned
1694rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1695{
1696 return rb_page_commit(cpu_buffer->commit_page);
1697}
1698
1699static inline unsigned
1700rb_event_index(struct ring_buffer_event *event)
1701{
1702 unsigned long addr = (unsigned long)event;
1703
1704 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1705}
1706
1707static inline int
1708rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1709 struct ring_buffer_event *event)
1710{
1711 unsigned long addr = (unsigned long)event;
1712 unsigned long index;
1713
1714 index = rb_event_index(event);
1715 addr &= PAGE_MASK;
1716
1717 return cpu_buffer->commit_page->page == (void *)addr &&
1718 rb_commit_index(cpu_buffer) == index;
1719}
1720
1721static void
1722rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1723{
1724 unsigned long max_count;
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734 again:
1735 max_count = cpu_buffer->nr_pages * 100;
1736
1737 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1738 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1739 return;
1740 if (RB_WARN_ON(cpu_buffer,
1741 rb_is_reader_page(cpu_buffer->tail_page)))
1742 return;
1743 local_set(&cpu_buffer->commit_page->page->commit,
1744 rb_page_write(cpu_buffer->commit_page));
1745 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1746 cpu_buffer->write_stamp =
1747 cpu_buffer->commit_page->page->time_stamp;
1748
1749 barrier();
1750 }
1751 while (rb_commit_index(cpu_buffer) !=
1752 rb_page_write(cpu_buffer->commit_page)) {
1753
1754 local_set(&cpu_buffer->commit_page->page->commit,
1755 rb_page_write(cpu_buffer->commit_page));
1756 RB_WARN_ON(cpu_buffer,
1757 local_read(&cpu_buffer->commit_page->page->commit) &
1758 ~RB_WRITE_MASK);
1759 barrier();
1760 }
1761
1762
1763 barrier();
1764
1765
1766
1767
1768
1769
1770 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
1771 goto again;
1772}
1773
1774static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1775{
1776 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
1777 cpu_buffer->reader_page->read = 0;
1778}
1779
1780static void rb_inc_iter(struct ring_buffer_iter *iter)
1781{
1782 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1783
1784
1785
1786
1787
1788
1789
1790 if (iter->head_page == cpu_buffer->reader_page)
1791 iter->head_page = rb_set_head_page(cpu_buffer);
1792 else
1793 rb_inc_page(cpu_buffer, &iter->head_page);
1794
1795 iter->read_stamp = iter->head_page->page->time_stamp;
1796 iter->head = 0;
1797}
1798
1799
1800static noinline struct ring_buffer_event *
1801rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1802{
1803 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1804
1805
1806 if (rb_event_index(event)) {
1807 event->time_delta = delta & TS_MASK;
1808 event->array[0] = delta >> TS_SHIFT;
1809 } else {
1810
1811 event->time_delta = 0;
1812 event->array[0] = 0;
1813 }
1814
1815 return skip_time_extend(event);
1816}
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829static void
1830rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1831 struct ring_buffer_event *event, unsigned length,
1832 int add_timestamp, u64 delta)
1833{
1834
1835 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1836 delta = 0;
1837
1838
1839
1840
1841
1842 if (unlikely(add_timestamp)) {
1843 event = rb_add_time_stamp(event, delta);
1844 length -= RB_LEN_TIME_EXTEND;
1845 delta = 0;
1846 }
1847
1848 event->time_delta = delta;
1849 length -= RB_EVNT_HDR_SIZE;
1850 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1851 event->type_len = 0;
1852 event->array[0] = length;
1853 } else
1854 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1855}
1856
1857
1858
1859
1860
1861
1862
1863
1864static int
1865rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1866 struct buffer_page *tail_page,
1867 struct buffer_page *next_page)
1868{
1869 struct buffer_page *new_head;
1870 int entries;
1871 int type;
1872 int ret;
1873
1874 entries = rb_page_entries(next_page);
1875
1876
1877
1878
1879
1880
1881 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1882 RB_PAGE_HEAD);
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895 switch (type) {
1896 case RB_PAGE_HEAD:
1897
1898
1899
1900
1901
1902 local_add(entries, &cpu_buffer->overrun);
1903 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1904
1905
1906
1907
1908
1909
1910
1911 break;
1912
1913 case RB_PAGE_UPDATE:
1914
1915
1916
1917
1918 break;
1919 case RB_PAGE_NORMAL:
1920
1921
1922
1923
1924
1925 return 1;
1926 case RB_PAGE_MOVED:
1927
1928
1929
1930
1931
1932 return 1;
1933 default:
1934 RB_WARN_ON(cpu_buffer, 1);
1935 return -1;
1936 }
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952 new_head = next_page;
1953 rb_inc_page(cpu_buffer, &new_head);
1954
1955 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1956 RB_PAGE_NORMAL);
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966 switch (ret) {
1967 case RB_PAGE_HEAD:
1968 case RB_PAGE_NORMAL:
1969
1970 break;
1971 default:
1972 RB_WARN_ON(cpu_buffer, 1);
1973 return -1;
1974 }
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986 if (ret == RB_PAGE_NORMAL) {
1987
1988
1989
1990
1991 if (cpu_buffer->tail_page != tail_page &&
1992 cpu_buffer->tail_page != next_page)
1993 rb_head_page_set_normal(cpu_buffer, new_head,
1994 next_page,
1995 RB_PAGE_HEAD);
1996 }
1997
1998
1999
2000
2001
2002
2003 if (type == RB_PAGE_HEAD) {
2004 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2005 tail_page,
2006 RB_PAGE_UPDATE);
2007 if (RB_WARN_ON(cpu_buffer,
2008 ret != RB_PAGE_UPDATE))
2009 return -1;
2010 }
2011
2012 return 0;
2013}
2014
2015static unsigned rb_calculate_event_length(unsigned length)
2016{
2017 struct ring_buffer_event event;
2018
2019
2020 if (!length)
2021 length = 1;
2022
2023 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2024 length += sizeof(event.array[0]);
2025
2026 length += RB_EVNT_HDR_SIZE;
2027 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2028
2029 return length;
2030}
2031
2032static inline void
2033rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2034 struct buffer_page *tail_page,
2035 unsigned long tail, unsigned long length)
2036{
2037 struct ring_buffer_event *event;
2038
2039
2040
2041
2042
2043 if (tail >= BUF_PAGE_SIZE) {
2044
2045
2046
2047
2048
2049 if (tail == BUF_PAGE_SIZE)
2050 tail_page->real_end = 0;
2051
2052 local_sub(length, &tail_page->write);
2053 return;
2054 }
2055
2056 event = __rb_page_index(tail_page, tail);
2057 kmemcheck_annotate_bitfield(event, bitfield);
2058
2059
2060 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2061
2062
2063
2064
2065
2066
2067 tail_page->real_end = tail;
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2081
2082
2083
2084 rb_event_set_padding(event);
2085
2086
2087 local_sub(length, &tail_page->write);
2088 return;
2089 }
2090
2091
2092 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2093 event->type_len = RINGBUF_TYPE_PADDING;
2094
2095 event->time_delta = 1;
2096
2097
2098 length = (tail + length) - BUF_PAGE_SIZE;
2099 local_sub(length, &tail_page->write);
2100}
2101
2102
2103
2104
2105static noinline struct ring_buffer_event *
2106rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2107 unsigned long length, unsigned long tail,
2108 struct buffer_page *tail_page, u64 ts)
2109{
2110 struct buffer_page *commit_page = cpu_buffer->commit_page;
2111 struct ring_buffer *buffer = cpu_buffer->buffer;
2112 struct buffer_page *next_page;
2113 int ret;
2114
2115 next_page = tail_page;
2116
2117 rb_inc_page(cpu_buffer, &next_page);
2118
2119
2120
2121
2122
2123
2124 if (unlikely(next_page == commit_page)) {
2125 local_inc(&cpu_buffer->commit_overrun);
2126 goto out_reset;
2127 }
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2144
2145
2146
2147
2148
2149 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2150
2151
2152
2153
2154 if (!(buffer->flags & RB_FL_OVERWRITE))
2155 goto out_reset;
2156
2157 ret = rb_handle_head_page(cpu_buffer,
2158 tail_page,
2159 next_page);
2160 if (ret < 0)
2161 goto out_reset;
2162 if (ret)
2163 goto out_again;
2164 } else {
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175 if (unlikely((cpu_buffer->commit_page !=
2176 cpu_buffer->tail_page) &&
2177 (cpu_buffer->commit_page ==
2178 cpu_buffer->reader_page))) {
2179 local_inc(&cpu_buffer->commit_overrun);
2180 goto out_reset;
2181 }
2182 }
2183 }
2184
2185 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
2186 if (ret) {
2187
2188
2189
2190
2191 ts = rb_time_stamp(buffer);
2192 next_page->page->time_stamp = ts;
2193 }
2194
2195 out_again:
2196
2197 rb_reset_tail(cpu_buffer, tail_page, tail, length);
2198
2199
2200 return ERR_PTR(-EAGAIN);
2201
2202 out_reset:
2203
2204 rb_reset_tail(cpu_buffer, tail_page, tail, length);
2205
2206 return NULL;
2207}
2208
2209static struct ring_buffer_event *
2210__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2211 unsigned long length, u64 ts,
2212 u64 delta, int add_timestamp)
2213{
2214 struct buffer_page *tail_page;
2215 struct ring_buffer_event *event;
2216 unsigned long tail, write;
2217
2218
2219
2220
2221
2222
2223 if (unlikely(add_timestamp))
2224 length += RB_LEN_TIME_EXTEND;
2225
2226 tail_page = cpu_buffer->tail_page;
2227 write = local_add_return(length, &tail_page->write);
2228
2229
2230 write &= RB_WRITE_MASK;
2231 tail = write - length;
2232
2233
2234 if (unlikely(write > BUF_PAGE_SIZE))
2235 return rb_move_tail(cpu_buffer, length, tail,
2236 tail_page, ts);
2237
2238
2239
2240 event = __rb_page_index(tail_page, tail);
2241 kmemcheck_annotate_bitfield(event, bitfield);
2242 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
2243
2244 local_inc(&tail_page->entries);
2245
2246
2247
2248
2249
2250 if (!tail)
2251 tail_page->page->time_stamp = ts;
2252
2253
2254 local_add(length, &cpu_buffer->entries_bytes);
2255
2256 return event;
2257}
2258
2259static inline int
2260rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2261 struct ring_buffer_event *event)
2262{
2263 unsigned long new_index, old_index;
2264 struct buffer_page *bpage;
2265 unsigned long index;
2266 unsigned long addr;
2267
2268 new_index = rb_event_index(event);
2269 old_index = new_index + rb_event_ts_length(event);
2270 addr = (unsigned long)event;
2271 addr &= PAGE_MASK;
2272
2273 bpage = cpu_buffer->tail_page;
2274
2275 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2276 unsigned long write_mask =
2277 local_read(&bpage->write) & ~RB_WRITE_MASK;
2278 unsigned long event_length = rb_event_length(event);
2279
2280
2281
2282
2283
2284
2285 old_index += write_mask;
2286 new_index += write_mask;
2287 index = local_cmpxchg(&bpage->write, old_index, new_index);
2288 if (index == old_index) {
2289
2290 local_sub(event_length, &cpu_buffer->entries_bytes);
2291 return 1;
2292 }
2293 }
2294
2295
2296 return 0;
2297}
2298
2299static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2300{
2301 local_inc(&cpu_buffer->committing);
2302 local_inc(&cpu_buffer->commits);
2303}
2304
2305static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2306{
2307 unsigned long commits;
2308
2309 if (RB_WARN_ON(cpu_buffer,
2310 !local_read(&cpu_buffer->committing)))
2311 return;
2312
2313 again:
2314 commits = local_read(&cpu_buffer->commits);
2315
2316 barrier();
2317 if (local_read(&cpu_buffer->committing) == 1)
2318 rb_set_commit_to_write(cpu_buffer);
2319
2320 local_dec(&cpu_buffer->committing);
2321
2322
2323 barrier();
2324
2325
2326
2327
2328
2329
2330 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2331 !local_read(&cpu_buffer->committing)) {
2332 local_inc(&cpu_buffer->committing);
2333 goto again;
2334 }
2335}
2336
2337static struct ring_buffer_event *
2338rb_reserve_next_event(struct ring_buffer *buffer,
2339 struct ring_buffer_per_cpu *cpu_buffer,
2340 unsigned long length)
2341{
2342 struct ring_buffer_event *event;
2343 u64 ts, delta;
2344 int nr_loops = 0;
2345 int add_timestamp;
2346 u64 diff;
2347
2348 rb_start_commit(cpu_buffer);
2349
2350#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2351
2352
2353
2354
2355
2356
2357 barrier();
2358 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2359 local_dec(&cpu_buffer->committing);
2360 local_dec(&cpu_buffer->commits);
2361 return NULL;
2362 }
2363#endif
2364
2365 length = rb_calculate_event_length(length);
2366 again:
2367 add_timestamp = 0;
2368 delta = 0;
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2380 goto out_fail;
2381
2382 ts = rb_time_stamp(cpu_buffer->buffer);
2383 diff = ts - cpu_buffer->write_stamp;
2384
2385
2386 barrier();
2387
2388
2389 if (likely(ts >= cpu_buffer->write_stamp)) {
2390 delta = diff;
2391 if (unlikely(test_time_stamp(delta))) {
2392 int local_clock_stable = 1;
2393#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2394 local_clock_stable = sched_clock_stable;
2395#endif
2396 WARN_ONCE(delta > (1ULL << 59),
2397 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2398 (unsigned long long)delta,
2399 (unsigned long long)ts,
2400 (unsigned long long)cpu_buffer->write_stamp,
2401 local_clock_stable ? "" :
2402 "If you just came from a suspend/resume,\n"
2403 "please switch to the trace global clock:\n"
2404 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2405 add_timestamp = 1;
2406 }
2407 }
2408
2409 event = __rb_reserve_next(cpu_buffer, length, ts,
2410 delta, add_timestamp);
2411 if (unlikely(PTR_ERR(event) == -EAGAIN))
2412 goto again;
2413
2414 if (!event)
2415 goto out_fail;
2416
2417 return event;
2418
2419 out_fail:
2420 rb_end_commit(cpu_buffer);
2421 return NULL;
2422}
2423
2424#ifdef CONFIG_TRACING
2425
2426#define TRACE_RECURSIVE_DEPTH 16
2427
2428
2429static noinline void trace_recursive_fail(void)
2430{
2431
2432 tracing_off_permanent();
2433
2434 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2435 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2436 trace_recursion_buffer(),
2437 hardirq_count() >> HARDIRQ_SHIFT,
2438 softirq_count() >> SOFTIRQ_SHIFT,
2439 in_nmi());
2440
2441 WARN_ON_ONCE(1);
2442}
2443
2444static inline int trace_recursive_lock(void)
2445{
2446 trace_recursion_inc();
2447
2448 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
2449 return 0;
2450
2451 trace_recursive_fail();
2452
2453 return -1;
2454}
2455
2456static inline void trace_recursive_unlock(void)
2457{
2458 WARN_ON_ONCE(!trace_recursion_buffer());
2459
2460 trace_recursion_dec();
2461}
2462
2463#else
2464
2465#define trace_recursive_lock() (0)
2466#define trace_recursive_unlock() do { } while (0)
2467
2468#endif
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485struct ring_buffer_event *
2486ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2487{
2488 struct ring_buffer_per_cpu *cpu_buffer;
2489 struct ring_buffer_event *event;
2490 int cpu;
2491
2492 if (ring_buffer_flags != RB_BUFFERS_ON)
2493 return NULL;
2494
2495
2496 preempt_disable_notrace();
2497
2498 if (atomic_read(&buffer->record_disabled))
2499 goto out_nocheck;
2500
2501 if (trace_recursive_lock())
2502 goto out_nocheck;
2503
2504 cpu = raw_smp_processor_id();
2505
2506 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2507 goto out;
2508
2509 cpu_buffer = buffer->buffers[cpu];
2510
2511 if (atomic_read(&cpu_buffer->record_disabled))
2512 goto out;
2513
2514 if (length > BUF_MAX_DATA_SIZE)
2515 goto out;
2516
2517 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2518 if (!event)
2519 goto out;
2520
2521 return event;
2522
2523 out:
2524 trace_recursive_unlock();
2525
2526 out_nocheck:
2527 preempt_enable_notrace();
2528 return NULL;
2529}
2530EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
2531
2532static void
2533rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2534 struct ring_buffer_event *event)
2535{
2536 u64 delta;
2537
2538
2539
2540
2541
2542 if (rb_event_is_commit(cpu_buffer, event)) {
2543
2544
2545
2546
2547 if (!rb_event_index(event))
2548 cpu_buffer->write_stamp =
2549 cpu_buffer->commit_page->page->time_stamp;
2550 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2551 delta = event->array[0];
2552 delta <<= TS_SHIFT;
2553 delta += event->time_delta;
2554 cpu_buffer->write_stamp += delta;
2555 } else
2556 cpu_buffer->write_stamp += event->time_delta;
2557 }
2558}
2559
2560static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2561 struct ring_buffer_event *event)
2562{
2563 local_inc(&cpu_buffer->entries);
2564 rb_update_write_stamp(cpu_buffer, event);
2565 rb_end_commit(cpu_buffer);
2566}
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2578 struct ring_buffer_event *event)
2579{
2580 struct ring_buffer_per_cpu *cpu_buffer;
2581 int cpu = raw_smp_processor_id();
2582
2583 cpu_buffer = buffer->buffers[cpu];
2584
2585 rb_commit(cpu_buffer, event);
2586
2587 trace_recursive_unlock();
2588
2589 preempt_enable_notrace();
2590
2591 return 0;
2592}
2593EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2594
2595static inline void rb_event_discard(struct ring_buffer_event *event)
2596{
2597 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2598 event = skip_time_extend(event);
2599
2600
2601 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2602 event->type_len = RINGBUF_TYPE_PADDING;
2603
2604 if (!event->time_delta)
2605 event->time_delta = 1;
2606}
2607
2608
2609
2610
2611
2612
2613
2614static inline void
2615rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2616 struct ring_buffer_event *event)
2617{
2618 unsigned long addr = (unsigned long)event;
2619 struct buffer_page *bpage = cpu_buffer->commit_page;
2620 struct buffer_page *start;
2621
2622 addr &= PAGE_MASK;
2623
2624
2625 if (likely(bpage->page == (void *)addr)) {
2626 local_dec(&bpage->entries);
2627 return;
2628 }
2629
2630
2631
2632
2633
2634 rb_inc_page(cpu_buffer, &bpage);
2635 start = bpage;
2636 do {
2637 if (bpage->page == (void *)addr) {
2638 local_dec(&bpage->entries);
2639 return;
2640 }
2641 rb_inc_page(cpu_buffer, &bpage);
2642 } while (bpage != start);
2643
2644
2645 RB_WARN_ON(cpu_buffer, 1);
2646}
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667void ring_buffer_discard_commit(struct ring_buffer *buffer,
2668 struct ring_buffer_event *event)
2669{
2670 struct ring_buffer_per_cpu *cpu_buffer;
2671 int cpu;
2672
2673
2674 rb_event_discard(event);
2675
2676 cpu = smp_processor_id();
2677 cpu_buffer = buffer->buffers[cpu];
2678
2679
2680
2681
2682
2683
2684 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
2685
2686 rb_decrement_entry(cpu_buffer, event);
2687 if (rb_try_to_discard(cpu_buffer, event))
2688 goto out;
2689
2690
2691
2692
2693
2694 rb_update_write_stamp(cpu_buffer, event);
2695 out:
2696 rb_end_commit(cpu_buffer);
2697
2698 trace_recursive_unlock();
2699
2700 preempt_enable_notrace();
2701
2702}
2703EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718int ring_buffer_write(struct ring_buffer *buffer,
2719 unsigned long length,
2720 void *data)
2721{
2722 struct ring_buffer_per_cpu *cpu_buffer;
2723 struct ring_buffer_event *event;
2724 void *body;
2725 int ret = -EBUSY;
2726 int cpu;
2727
2728 if (ring_buffer_flags != RB_BUFFERS_ON)
2729 return -EBUSY;
2730
2731 preempt_disable_notrace();
2732
2733 if (atomic_read(&buffer->record_disabled))
2734 goto out;
2735
2736 cpu = raw_smp_processor_id();
2737
2738 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2739 goto out;
2740
2741 cpu_buffer = buffer->buffers[cpu];
2742
2743 if (atomic_read(&cpu_buffer->record_disabled))
2744 goto out;
2745
2746 if (length > BUF_MAX_DATA_SIZE)
2747 goto out;
2748
2749 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2750 if (!event)
2751 goto out;
2752
2753 body = rb_event_data(event);
2754
2755 memcpy(body, data, length);
2756
2757 rb_commit(cpu_buffer, event);
2758
2759 ret = 0;
2760 out:
2761 preempt_enable_notrace();
2762
2763 return ret;
2764}
2765EXPORT_SYMBOL_GPL(ring_buffer_write);
2766
2767static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
2768{
2769 struct buffer_page *reader = cpu_buffer->reader_page;
2770 struct buffer_page *head = rb_set_head_page(cpu_buffer);
2771 struct buffer_page *commit = cpu_buffer->commit_page;
2772
2773
2774 if (unlikely(!head))
2775 return 1;
2776
2777 return reader->read == rb_page_commit(reader) &&
2778 (commit == reader ||
2779 (commit == head &&
2780 head->read == rb_page_commit(commit)));
2781}
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792void ring_buffer_record_disable(struct ring_buffer *buffer)
2793{
2794 atomic_inc(&buffer->record_disabled);
2795}
2796EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2797
2798
2799
2800
2801
2802
2803
2804
2805void ring_buffer_record_enable(struct ring_buffer *buffer)
2806{
2807 atomic_dec(&buffer->record_disabled);
2808}
2809EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822void ring_buffer_record_off(struct ring_buffer *buffer)
2823{
2824 unsigned int rd;
2825 unsigned int new_rd;
2826
2827 do {
2828 rd = atomic_read(&buffer->record_disabled);
2829 new_rd = rd | RB_BUFFER_OFF;
2830 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2831}
2832EXPORT_SYMBOL_GPL(ring_buffer_record_off);
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845void ring_buffer_record_on(struct ring_buffer *buffer)
2846{
2847 unsigned int rd;
2848 unsigned int new_rd;
2849
2850 do {
2851 rd = atomic_read(&buffer->record_disabled);
2852 new_rd = rd & ~RB_BUFFER_OFF;
2853 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2854}
2855EXPORT_SYMBOL_GPL(ring_buffer_record_on);
2856
2857
2858
2859
2860
2861
2862
2863int ring_buffer_record_is_on(struct ring_buffer *buffer)
2864{
2865 return !atomic_read(&buffer->record_disabled);
2866}
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
2879{
2880 struct ring_buffer_per_cpu *cpu_buffer;
2881
2882 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2883 return;
2884
2885 cpu_buffer = buffer->buffers[cpu];
2886 atomic_inc(&cpu_buffer->record_disabled);
2887}
2888EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2899{
2900 struct ring_buffer_per_cpu *cpu_buffer;
2901
2902 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2903 return;
2904
2905 cpu_buffer = buffer->buffers[cpu];
2906 atomic_dec(&cpu_buffer->record_disabled);
2907}
2908EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2909
2910
2911
2912
2913
2914
2915
2916static inline unsigned long
2917rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2918{
2919 return local_read(&cpu_buffer->entries) -
2920 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2921}
2922
2923
2924
2925
2926
2927
2928unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
2929{
2930 unsigned long flags;
2931 struct ring_buffer_per_cpu *cpu_buffer;
2932 struct buffer_page *bpage;
2933 unsigned long ret;
2934
2935 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2936 return 0;
2937
2938 cpu_buffer = buffer->buffers[cpu];
2939 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2940
2941
2942
2943
2944 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
2945 bpage = cpu_buffer->reader_page;
2946 else
2947 bpage = rb_set_head_page(cpu_buffer);
2948 ret = bpage->page->time_stamp;
2949 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2950
2951 return ret;
2952}
2953EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
2954
2955
2956
2957
2958
2959
2960unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
2961{
2962 struct ring_buffer_per_cpu *cpu_buffer;
2963 unsigned long ret;
2964
2965 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2966 return 0;
2967
2968 cpu_buffer = buffer->buffers[cpu];
2969 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
2970
2971 return ret;
2972}
2973EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
2974
2975
2976
2977
2978
2979
2980unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2981{
2982 struct ring_buffer_per_cpu *cpu_buffer;
2983
2984 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2985 return 0;
2986
2987 cpu_buffer = buffer->buffers[cpu];
2988
2989 return rb_num_of_entries(cpu_buffer);
2990}
2991EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2992
2993
2994
2995
2996
2997
2998unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
2999{
3000 struct ring_buffer_per_cpu *cpu_buffer;
3001 unsigned long ret;
3002
3003 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3004 return 0;
3005
3006 cpu_buffer = buffer->buffers[cpu];
3007 ret = local_read(&cpu_buffer->overrun);
3008
3009 return ret;
3010}
3011EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3012
3013
3014
3015
3016
3017
3018unsigned long
3019ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3020{
3021 struct ring_buffer_per_cpu *cpu_buffer;
3022 unsigned long ret;
3023
3024 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3025 return 0;
3026
3027 cpu_buffer = buffer->buffers[cpu];
3028 ret = local_read(&cpu_buffer->commit_overrun);
3029
3030 return ret;
3031}
3032EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3033
3034
3035
3036
3037
3038
3039
3040
3041unsigned long ring_buffer_entries(struct ring_buffer *buffer)
3042{
3043 struct ring_buffer_per_cpu *cpu_buffer;
3044 unsigned long entries = 0;
3045 int cpu;
3046
3047
3048 for_each_buffer_cpu(buffer, cpu) {
3049 cpu_buffer = buffer->buffers[cpu];
3050 entries += rb_num_of_entries(cpu_buffer);
3051 }
3052
3053 return entries;
3054}
3055EXPORT_SYMBOL_GPL(ring_buffer_entries);
3056
3057
3058
3059
3060
3061
3062
3063
3064unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
3065{
3066 struct ring_buffer_per_cpu *cpu_buffer;
3067 unsigned long overruns = 0;
3068 int cpu;
3069
3070
3071 for_each_buffer_cpu(buffer, cpu) {
3072 cpu_buffer = buffer->buffers[cpu];
3073 overruns += local_read(&cpu_buffer->overrun);
3074 }
3075
3076 return overruns;
3077}
3078EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3079
3080static void rb_iter_reset(struct ring_buffer_iter *iter)
3081{
3082 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3083
3084
3085 if (list_empty(&cpu_buffer->reader_page->list)) {
3086 iter->head_page = rb_set_head_page(cpu_buffer);
3087 if (unlikely(!iter->head_page))
3088 return;
3089 iter->head = iter->head_page->read;
3090 } else {
3091 iter->head_page = cpu_buffer->reader_page;
3092 iter->head = cpu_buffer->reader_page->read;
3093 }
3094 if (iter->head)
3095 iter->read_stamp = cpu_buffer->read_stamp;
3096 else
3097 iter->read_stamp = iter->head_page->page->time_stamp;
3098 iter->cache_reader_page = cpu_buffer->reader_page;
3099 iter->cache_read = cpu_buffer->read;
3100}
3101
3102
3103
3104
3105
3106
3107
3108
3109void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3110{
3111 struct ring_buffer_per_cpu *cpu_buffer;
3112 unsigned long flags;
3113
3114 if (!iter)
3115 return;
3116
3117 cpu_buffer = iter->cpu_buffer;
3118
3119 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3120 rb_iter_reset(iter);
3121 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3122}
3123EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3124
3125
3126
3127
3128
3129int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3130{
3131 struct ring_buffer_per_cpu *cpu_buffer;
3132
3133 cpu_buffer = iter->cpu_buffer;
3134
3135 return iter->head_page == cpu_buffer->commit_page &&
3136 iter->head == rb_commit_index(cpu_buffer);
3137}
3138EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3139
3140static void
3141rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3142 struct ring_buffer_event *event)
3143{
3144 u64 delta;
3145
3146 switch (event->type_len) {
3147 case RINGBUF_TYPE_PADDING:
3148 return;
3149
3150 case RINGBUF_TYPE_TIME_EXTEND:
3151 delta = event->array[0];
3152 delta <<= TS_SHIFT;
3153 delta += event->time_delta;
3154 cpu_buffer->read_stamp += delta;
3155 return;
3156
3157 case RINGBUF_TYPE_TIME_STAMP:
3158
3159 return;
3160
3161 case RINGBUF_TYPE_DATA:
3162 cpu_buffer->read_stamp += event->time_delta;
3163 return;
3164
3165 default:
3166 BUG();
3167 }
3168 return;
3169}
3170
3171static void
3172rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3173 struct ring_buffer_event *event)
3174{
3175 u64 delta;
3176
3177 switch (event->type_len) {
3178 case RINGBUF_TYPE_PADDING:
3179 return;
3180
3181 case RINGBUF_TYPE_TIME_EXTEND:
3182 delta = event->array[0];
3183 delta <<= TS_SHIFT;
3184 delta += event->time_delta;
3185 iter->read_stamp += delta;
3186 return;
3187
3188 case RINGBUF_TYPE_TIME_STAMP:
3189
3190 return;
3191
3192 case RINGBUF_TYPE_DATA:
3193 iter->read_stamp += event->time_delta;
3194 return;
3195
3196 default:
3197 BUG();
3198 }
3199 return;
3200}
3201
3202static struct buffer_page *
3203rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3204{
3205 struct buffer_page *reader = NULL;
3206 unsigned long overwrite;
3207 unsigned long flags;
3208 int nr_loops = 0;
3209 int ret;
3210
3211 local_irq_save(flags);
3212 arch_spin_lock(&cpu_buffer->lock);
3213
3214 again:
3215
3216
3217
3218
3219
3220
3221 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3222 reader = NULL;
3223 goto out;
3224 }
3225
3226 reader = cpu_buffer->reader_page;
3227
3228
3229 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3230 goto out;
3231
3232
3233 if (RB_WARN_ON(cpu_buffer,
3234 cpu_buffer->reader_page->read > rb_page_size(reader)))
3235 goto out;
3236
3237
3238 reader = NULL;
3239 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3240 goto out;
3241
3242
3243 if (rb_num_of_entries(cpu_buffer) == 0)
3244 goto out;
3245
3246
3247
3248
3249 local_set(&cpu_buffer->reader_page->write, 0);
3250 local_set(&cpu_buffer->reader_page->entries, 0);
3251 local_set(&cpu_buffer->reader_page->page->commit, 0);
3252 cpu_buffer->reader_page->real_end = 0;
3253
3254 spin:
3255
3256
3257
3258 reader = rb_set_head_page(cpu_buffer);
3259 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3260 cpu_buffer->reader_page->list.prev = reader->list.prev;
3261
3262
3263
3264
3265
3266
3267 cpu_buffer->pages = reader->list.prev;
3268
3269
3270 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281 smp_mb();
3282 overwrite = local_read(&(cpu_buffer->overrun));
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3296
3297
3298
3299
3300 if (!ret)
3301 goto spin;
3302
3303
3304
3305
3306
3307
3308 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3309 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3310
3311
3312 cpu_buffer->reader_page = reader;
3313 rb_reset_reader_page(cpu_buffer);
3314
3315 if (overwrite != cpu_buffer->last_overrun) {
3316 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3317 cpu_buffer->last_overrun = overwrite;
3318 }
3319
3320 goto again;
3321
3322 out:
3323 arch_spin_unlock(&cpu_buffer->lock);
3324 local_irq_restore(flags);
3325
3326 return reader;
3327}
3328
3329static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3330{
3331 struct ring_buffer_event *event;
3332 struct buffer_page *reader;
3333 unsigned length;
3334
3335 reader = rb_get_reader_page(cpu_buffer);
3336
3337
3338 if (RB_WARN_ON(cpu_buffer, !reader))
3339 return;
3340
3341 event = rb_reader_event(cpu_buffer);
3342
3343 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3344 cpu_buffer->read++;
3345
3346 rb_update_read_stamp(cpu_buffer, event);
3347
3348 length = rb_event_length(event);
3349 cpu_buffer->reader_page->read += length;
3350}
3351
3352static void rb_advance_iter(struct ring_buffer_iter *iter)
3353{
3354 struct ring_buffer_per_cpu *cpu_buffer;
3355 struct ring_buffer_event *event;
3356 unsigned length;
3357
3358 cpu_buffer = iter->cpu_buffer;
3359
3360
3361
3362
3363 if (iter->head >= rb_page_size(iter->head_page)) {
3364
3365 if (iter->head_page == cpu_buffer->commit_page)
3366 return;
3367 rb_inc_iter(iter);
3368 return;
3369 }
3370
3371 event = rb_iter_head_event(iter);
3372
3373 length = rb_event_length(event);
3374
3375
3376
3377
3378
3379 if (RB_WARN_ON(cpu_buffer,
3380 (iter->head_page == cpu_buffer->commit_page) &&
3381 (iter->head + length > rb_commit_index(cpu_buffer))))
3382 return;
3383
3384 rb_update_iter_read_stamp(iter, event);
3385
3386 iter->head += length;
3387
3388
3389 if ((iter->head >= rb_page_size(iter->head_page)) &&
3390 (iter->head_page != cpu_buffer->commit_page))
3391 rb_advance_iter(iter);
3392}
3393
3394static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3395{
3396 return cpu_buffer->lost_events;
3397}
3398
3399static struct ring_buffer_event *
3400rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3401 unsigned long *lost_events)
3402{
3403 struct ring_buffer_event *event;
3404 struct buffer_page *reader;
3405 int nr_loops = 0;
3406
3407 again:
3408
3409
3410
3411
3412
3413
3414 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3415 return NULL;
3416
3417 reader = rb_get_reader_page(cpu_buffer);
3418 if (!reader)
3419 return NULL;
3420
3421 event = rb_reader_event(cpu_buffer);
3422
3423 switch (event->type_len) {
3424 case RINGBUF_TYPE_PADDING:
3425 if (rb_null_event(event))
3426 RB_WARN_ON(cpu_buffer, 1);
3427
3428
3429
3430
3431
3432
3433
3434
3435 return event;
3436
3437 case RINGBUF_TYPE_TIME_EXTEND:
3438
3439 rb_advance_reader(cpu_buffer);
3440 goto again;
3441
3442 case RINGBUF_TYPE_TIME_STAMP:
3443
3444 rb_advance_reader(cpu_buffer);
3445 goto again;
3446
3447 case RINGBUF_TYPE_DATA:
3448 if (ts) {
3449 *ts = cpu_buffer->read_stamp + event->time_delta;
3450 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3451 cpu_buffer->cpu, ts);
3452 }
3453 if (lost_events)
3454 *lost_events = rb_lost_events(cpu_buffer);
3455 return event;
3456
3457 default:
3458 BUG();
3459 }
3460
3461 return NULL;
3462}
3463EXPORT_SYMBOL_GPL(ring_buffer_peek);
3464
3465static struct ring_buffer_event *
3466rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3467{
3468 struct ring_buffer *buffer;
3469 struct ring_buffer_per_cpu *cpu_buffer;
3470 struct ring_buffer_event *event;
3471 int nr_loops = 0;
3472
3473 cpu_buffer = iter->cpu_buffer;
3474 buffer = cpu_buffer->buffer;
3475
3476
3477
3478
3479
3480
3481 if (unlikely(iter->cache_read != cpu_buffer->read ||
3482 iter->cache_reader_page != cpu_buffer->reader_page))
3483 rb_iter_reset(iter);
3484
3485 again:
3486 if (ring_buffer_iter_empty(iter))
3487 return NULL;
3488
3489
3490
3491
3492
3493
3494
3495 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3496 return NULL;
3497
3498 if (rb_per_cpu_empty(cpu_buffer))
3499 return NULL;
3500
3501 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3502 rb_inc_iter(iter);
3503 goto again;
3504 }
3505
3506 event = rb_iter_head_event(iter);
3507
3508 switch (event->type_len) {
3509 case RINGBUF_TYPE_PADDING:
3510 if (rb_null_event(event)) {
3511 rb_inc_iter(iter);
3512 goto again;
3513 }
3514 rb_advance_iter(iter);
3515 return event;
3516
3517 case RINGBUF_TYPE_TIME_EXTEND:
3518
3519 rb_advance_iter(iter);
3520 goto again;
3521
3522 case RINGBUF_TYPE_TIME_STAMP:
3523
3524 rb_advance_iter(iter);
3525 goto again;
3526
3527 case RINGBUF_TYPE_DATA:
3528 if (ts) {
3529 *ts = iter->read_stamp + event->time_delta;
3530 ring_buffer_normalize_time_stamp(buffer,
3531 cpu_buffer->cpu, ts);
3532 }
3533 return event;
3534
3535 default:
3536 BUG();
3537 }
3538
3539 return NULL;
3540}
3541EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
3542
3543static inline int rb_ok_to_lock(void)
3544{
3545
3546
3547
3548
3549
3550
3551 if (likely(!in_nmi()))
3552 return 1;
3553
3554 tracing_off_permanent();
3555 return 0;
3556}
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568struct ring_buffer_event *
3569ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3570 unsigned long *lost_events)
3571{
3572 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3573 struct ring_buffer_event *event;
3574 unsigned long flags;
3575 int dolock;
3576
3577 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3578 return NULL;
3579
3580 dolock = rb_ok_to_lock();
3581 again:
3582 local_irq_save(flags);
3583 if (dolock)
3584 raw_spin_lock(&cpu_buffer->reader_lock);
3585 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3586 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3587 rb_advance_reader(cpu_buffer);
3588 if (dolock)
3589 raw_spin_unlock(&cpu_buffer->reader_lock);
3590 local_irq_restore(flags);
3591
3592 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3593 goto again;
3594
3595 return event;
3596}
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606struct ring_buffer_event *
3607ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3608{
3609 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3610 struct ring_buffer_event *event;
3611 unsigned long flags;
3612
3613 again:
3614 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3615 event = rb_iter_peek(iter, ts);
3616 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3617
3618 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3619 goto again;
3620
3621 return event;
3622}
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635struct ring_buffer_event *
3636ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3637 unsigned long *lost_events)
3638{
3639 struct ring_buffer_per_cpu *cpu_buffer;
3640 struct ring_buffer_event *event = NULL;
3641 unsigned long flags;
3642 int dolock;
3643
3644 dolock = rb_ok_to_lock();
3645
3646 again:
3647
3648 preempt_disable();
3649
3650 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3651 goto out;
3652
3653 cpu_buffer = buffer->buffers[cpu];
3654 local_irq_save(flags);
3655 if (dolock)
3656 raw_spin_lock(&cpu_buffer->reader_lock);
3657
3658 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3659 if (event) {
3660 cpu_buffer->lost_events = 0;
3661 rb_advance_reader(cpu_buffer);
3662 }
3663
3664 if (dolock)
3665 raw_spin_unlock(&cpu_buffer->reader_lock);
3666 local_irq_restore(flags);
3667
3668 out:
3669 preempt_enable();
3670
3671 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3672 goto again;
3673
3674 return event;
3675}
3676EXPORT_SYMBOL_GPL(ring_buffer_consume);
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698struct ring_buffer_iter *
3699ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3700{
3701 struct ring_buffer_per_cpu *cpu_buffer;
3702 struct ring_buffer_iter *iter;
3703
3704 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3705 return NULL;
3706
3707 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
3708 if (!iter)
3709 return NULL;
3710
3711 cpu_buffer = buffer->buffers[cpu];
3712
3713 iter->cpu_buffer = cpu_buffer;
3714
3715 atomic_inc(&buffer->resize_disabled);
3716 atomic_inc(&cpu_buffer->record_disabled);
3717
3718 return iter;
3719}
3720EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3721
3722
3723
3724
3725
3726
3727
3728
3729void
3730ring_buffer_read_prepare_sync(void)
3731{
3732 synchronize_sched();
3733}
3734EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747void
3748ring_buffer_read_start(struct ring_buffer_iter *iter)
3749{
3750 struct ring_buffer_per_cpu *cpu_buffer;
3751 unsigned long flags;
3752
3753 if (!iter)
3754 return;
3755
3756 cpu_buffer = iter->cpu_buffer;
3757
3758 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3759 arch_spin_lock(&cpu_buffer->lock);
3760 rb_iter_reset(iter);
3761 arch_spin_unlock(&cpu_buffer->lock);
3762 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3763}
3764EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3765
3766
3767
3768
3769
3770
3771
3772
3773void
3774ring_buffer_read_finish(struct ring_buffer_iter *iter)
3775{
3776 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3777
3778
3779
3780
3781
3782 rb_check_pages(cpu_buffer);
3783
3784 atomic_dec(&cpu_buffer->record_disabled);
3785 atomic_dec(&cpu_buffer->buffer->resize_disabled);
3786 kfree(iter);
3787}
3788EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
3789
3790
3791
3792
3793
3794
3795
3796
3797struct ring_buffer_event *
3798ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
3799{
3800 struct ring_buffer_event *event;
3801 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3802 unsigned long flags;
3803
3804 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3805 again:
3806 event = rb_iter_peek(iter, ts);
3807 if (!event)
3808 goto out;
3809
3810 if (event->type_len == RINGBUF_TYPE_PADDING)
3811 goto again;
3812
3813 rb_advance_iter(iter);
3814 out:
3815 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3816
3817 return event;
3818}
3819EXPORT_SYMBOL_GPL(ring_buffer_read);
3820
3821
3822
3823
3824
3825unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
3826{
3827
3828
3829
3830
3831
3832
3833 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3834 return 0;
3835
3836 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3837}
3838EXPORT_SYMBOL_GPL(ring_buffer_size);
3839
3840static void
3841rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3842{
3843 rb_head_page_deactivate(cpu_buffer);
3844
3845 cpu_buffer->head_page
3846 = list_entry(cpu_buffer->pages, struct buffer_page, list);
3847 local_set(&cpu_buffer->head_page->write, 0);
3848 local_set(&cpu_buffer->head_page->entries, 0);
3849 local_set(&cpu_buffer->head_page->page->commit, 0);
3850
3851 cpu_buffer->head_page->read = 0;
3852
3853 cpu_buffer->tail_page = cpu_buffer->head_page;
3854 cpu_buffer->commit_page = cpu_buffer->head_page;
3855
3856 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
3857 INIT_LIST_HEAD(&cpu_buffer->new_pages);
3858 local_set(&cpu_buffer->reader_page->write, 0);
3859 local_set(&cpu_buffer->reader_page->entries, 0);
3860 local_set(&cpu_buffer->reader_page->page->commit, 0);
3861 cpu_buffer->reader_page->read = 0;
3862
3863 local_set(&cpu_buffer->commit_overrun, 0);
3864 local_set(&cpu_buffer->entries_bytes, 0);
3865 local_set(&cpu_buffer->overrun, 0);
3866 local_set(&cpu_buffer->entries, 0);
3867 local_set(&cpu_buffer->committing, 0);
3868 local_set(&cpu_buffer->commits, 0);
3869 cpu_buffer->read = 0;
3870 cpu_buffer->read_bytes = 0;
3871
3872 cpu_buffer->write_stamp = 0;
3873 cpu_buffer->read_stamp = 0;
3874
3875 cpu_buffer->lost_events = 0;
3876 cpu_buffer->last_overrun = 0;
3877
3878 rb_head_page_activate(cpu_buffer);
3879}
3880
3881
3882
3883
3884
3885
3886void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3887{
3888 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3889 unsigned long flags;
3890
3891 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3892 return;
3893
3894 atomic_inc(&buffer->resize_disabled);
3895 atomic_inc(&cpu_buffer->record_disabled);
3896
3897
3898 synchronize_sched();
3899
3900 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3901
3902 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3903 goto out;
3904
3905 arch_spin_lock(&cpu_buffer->lock);
3906
3907 rb_reset_cpu(cpu_buffer);
3908
3909 arch_spin_unlock(&cpu_buffer->lock);
3910
3911 out:
3912 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3913
3914 atomic_dec(&cpu_buffer->record_disabled);
3915 atomic_dec(&buffer->resize_disabled);
3916}
3917EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
3918
3919
3920
3921
3922
3923void ring_buffer_reset(struct ring_buffer *buffer)
3924{
3925 int cpu;
3926
3927 for_each_buffer_cpu(buffer, cpu)
3928 ring_buffer_reset_cpu(buffer, cpu);
3929}
3930EXPORT_SYMBOL_GPL(ring_buffer_reset);
3931
3932
3933
3934
3935
3936int ring_buffer_empty(struct ring_buffer *buffer)
3937{
3938 struct ring_buffer_per_cpu *cpu_buffer;
3939 unsigned long flags;
3940 int dolock;
3941 int cpu;
3942 int ret;
3943
3944 dolock = rb_ok_to_lock();
3945
3946
3947 for_each_buffer_cpu(buffer, cpu) {
3948 cpu_buffer = buffer->buffers[cpu];
3949 local_irq_save(flags);
3950 if (dolock)
3951 raw_spin_lock(&cpu_buffer->reader_lock);
3952 ret = rb_per_cpu_empty(cpu_buffer);
3953 if (dolock)
3954 raw_spin_unlock(&cpu_buffer->reader_lock);
3955 local_irq_restore(flags);
3956
3957 if (!ret)
3958 return 0;
3959 }
3960
3961 return 1;
3962}
3963EXPORT_SYMBOL_GPL(ring_buffer_empty);
3964
3965
3966
3967
3968
3969
3970int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
3971{
3972 struct ring_buffer_per_cpu *cpu_buffer;
3973 unsigned long flags;
3974 int dolock;
3975 int ret;
3976
3977 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3978 return 1;
3979
3980 dolock = rb_ok_to_lock();
3981
3982 cpu_buffer = buffer->buffers[cpu];
3983 local_irq_save(flags);
3984 if (dolock)
3985 raw_spin_lock(&cpu_buffer->reader_lock);
3986 ret = rb_per_cpu_empty(cpu_buffer);
3987 if (dolock)
3988 raw_spin_unlock(&cpu_buffer->reader_lock);
3989 local_irq_restore(flags);
3990
3991 return ret;
3992}
3993EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
3994
3995#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4007 struct ring_buffer *buffer_b, int cpu)
4008{
4009 struct ring_buffer_per_cpu *cpu_buffer_a;
4010 struct ring_buffer_per_cpu *cpu_buffer_b;
4011 int ret = -EINVAL;
4012
4013 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4014 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4015 goto out;
4016
4017 cpu_buffer_a = buffer_a->buffers[cpu];
4018 cpu_buffer_b = buffer_b->buffers[cpu];
4019
4020
4021 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4022 goto out;
4023
4024 ret = -EAGAIN;
4025
4026 if (ring_buffer_flags != RB_BUFFERS_ON)
4027 goto out;
4028
4029 if (atomic_read(&buffer_a->record_disabled))
4030 goto out;
4031
4032 if (atomic_read(&buffer_b->record_disabled))
4033 goto out;
4034
4035 if (atomic_read(&cpu_buffer_a->record_disabled))
4036 goto out;
4037
4038 if (atomic_read(&cpu_buffer_b->record_disabled))
4039 goto out;
4040
4041
4042
4043
4044
4045
4046
4047 atomic_inc(&cpu_buffer_a->record_disabled);
4048 atomic_inc(&cpu_buffer_b->record_disabled);
4049
4050 ret = -EBUSY;
4051 if (local_read(&cpu_buffer_a->committing))
4052 goto out_dec;
4053 if (local_read(&cpu_buffer_b->committing))
4054 goto out_dec;
4055
4056 buffer_a->buffers[cpu] = cpu_buffer_b;
4057 buffer_b->buffers[cpu] = cpu_buffer_a;
4058
4059 cpu_buffer_b->buffer = buffer_a;
4060 cpu_buffer_a->buffer = buffer_b;
4061
4062 ret = 0;
4063
4064out_dec:
4065 atomic_dec(&cpu_buffer_a->record_disabled);
4066 atomic_dec(&cpu_buffer_b->record_disabled);
4067out:
4068 return ret;
4069}
4070EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4071#endif
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4089{
4090 struct buffer_data_page *bpage;
4091 struct page *page;
4092
4093 page = alloc_pages_node(cpu_to_node(cpu),
4094 GFP_KERNEL | __GFP_NORETRY, 0);
4095 if (!page)
4096 return NULL;
4097
4098 bpage = page_address(page);
4099
4100 rb_init_page(bpage);
4101
4102 return bpage;
4103}
4104EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4105
4106
4107
4108
4109
4110
4111
4112
4113void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
4114{
4115 free_page((unsigned long)data);
4116}
4117EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152int ring_buffer_read_page(struct ring_buffer *buffer,
4153 void **data_page, size_t len, int cpu, int full)
4154{
4155 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4156 struct ring_buffer_event *event;
4157 struct buffer_data_page *bpage;
4158 struct buffer_page *reader;
4159 unsigned long missed_events;
4160 unsigned long flags;
4161 unsigned int commit;
4162 unsigned int read;
4163 u64 save_timestamp;
4164 int ret = -1;
4165
4166 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4167 goto out;
4168
4169
4170
4171
4172
4173 if (len <= BUF_PAGE_HDR_SIZE)
4174 goto out;
4175
4176 len -= BUF_PAGE_HDR_SIZE;
4177
4178 if (!data_page)
4179 goto out;
4180
4181 bpage = *data_page;
4182 if (!bpage)
4183 goto out;
4184
4185 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4186
4187 reader = rb_get_reader_page(cpu_buffer);
4188 if (!reader)
4189 goto out_unlock;
4190
4191 event = rb_reader_event(cpu_buffer);
4192
4193 read = reader->read;
4194 commit = rb_page_commit(reader);
4195
4196
4197 missed_events = cpu_buffer->lost_events;
4198
4199
4200
4201
4202
4203
4204
4205
4206 if (read || (len < (commit - read)) ||
4207 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4208 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4209 unsigned int rpos = read;
4210 unsigned int pos = 0;
4211 unsigned int size;
4212
4213 if (full)
4214 goto out_unlock;
4215
4216 if (len > (commit - read))
4217 len = (commit - read);
4218
4219
4220 size = rb_event_ts_length(event);
4221
4222 if (len < size)
4223 goto out_unlock;
4224
4225
4226 save_timestamp = cpu_buffer->read_stamp;
4227
4228
4229 do {
4230
4231
4232
4233
4234
4235
4236 size = rb_event_length(event);
4237 memcpy(bpage->data + pos, rpage->data + rpos, size);
4238
4239 len -= size;
4240
4241 rb_advance_reader(cpu_buffer);
4242 rpos = reader->read;
4243 pos += size;
4244
4245 if (rpos >= commit)
4246 break;
4247
4248 event = rb_reader_event(cpu_buffer);
4249
4250 size = rb_event_ts_length(event);
4251 } while (len >= size);
4252
4253
4254 local_set(&bpage->commit, pos);
4255 bpage->time_stamp = save_timestamp;
4256
4257
4258 read = 0;
4259 } else {
4260
4261 cpu_buffer->read += rb_page_entries(reader);
4262 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4263
4264
4265 rb_init_page(bpage);
4266 bpage = reader->page;
4267 reader->page = *data_page;
4268 local_set(&reader->write, 0);
4269 local_set(&reader->entries, 0);
4270 reader->read = 0;
4271 *data_page = bpage;
4272
4273
4274
4275
4276
4277
4278 if (reader->real_end)
4279 local_set(&bpage->commit, reader->real_end);
4280 }
4281 ret = read;
4282
4283 cpu_buffer->lost_events = 0;
4284
4285 commit = local_read(&bpage->commit);
4286
4287
4288
4289 if (missed_events) {
4290
4291
4292
4293 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4294 memcpy(&bpage->data[commit], &missed_events,
4295 sizeof(missed_events));
4296 local_add(RB_MISSED_STORED, &bpage->commit);
4297 commit += sizeof(missed_events);
4298 }
4299 local_add(RB_MISSED_EVENTS, &bpage->commit);
4300 }
4301
4302
4303
4304
4305 if (commit < BUF_PAGE_SIZE)
4306 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4307
4308 out_unlock:
4309 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4310
4311 out:
4312 return ret;
4313}
4314EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4315
4316#ifdef CONFIG_HOTPLUG_CPU
4317static int rb_cpu_notify(struct notifier_block *self,
4318 unsigned long action, void *hcpu)
4319{
4320 struct ring_buffer *buffer =
4321 container_of(self, struct ring_buffer, cpu_notify);
4322 long cpu = (long)hcpu;
4323 int cpu_i, nr_pages_same;
4324 unsigned int nr_pages;
4325
4326 switch (action) {
4327 case CPU_UP_PREPARE:
4328 case CPU_UP_PREPARE_FROZEN:
4329 if (cpumask_test_cpu(cpu, buffer->cpumask))
4330 return NOTIFY_OK;
4331
4332 nr_pages = 0;
4333 nr_pages_same = 1;
4334
4335 for_each_buffer_cpu(buffer, cpu_i) {
4336
4337 if (nr_pages == 0)
4338 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4339 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4340 nr_pages_same = 0;
4341 break;
4342 }
4343 }
4344
4345 if (!nr_pages_same)
4346 nr_pages = 2;
4347 buffer->buffers[cpu] =
4348 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4349 if (!buffer->buffers[cpu]) {
4350 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4351 cpu);
4352 return NOTIFY_OK;
4353 }
4354 smp_wmb();
4355 cpumask_set_cpu(cpu, buffer->cpumask);
4356 break;
4357 case CPU_DOWN_PREPARE:
4358 case CPU_DOWN_PREPARE_FROZEN:
4359
4360
4361
4362
4363
4364 break;
4365 default:
4366 break;
4367 }
4368 return NOTIFY_OK;
4369}
4370#endif
4371