1
2
3
4
5
6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h>
8#include <linux/spinlock.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/hardirq.h>
12#include <linux/kmemcheck.h>
13#include <linux/module.h>
14#include <linux/percpu.h>
15#include <linux/mutex.h>
16#include <linux/slab.h>
17#include <linux/init.h>
18#include <linux/hash.h>
19#include <linux/list.h>
20#include <linux/cpu.h>
21#include <linux/fs.h>
22
23#include <asm/local.h>
24#include "trace.h"
25
26static void update_pages_handler(struct work_struct *work);
27
28
29
30
31int ring_buffer_print_entry_header(struct trace_seq *s)
32{
33 int ret;
34
35 ret = trace_seq_printf(s, "# compressed entry header\n");
36 ret = trace_seq_printf(s, "\ttype_len : 5 bits\n");
37 ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n");
38 ret = trace_seq_printf(s, "\tarray : 32 bits\n");
39 ret = trace_seq_printf(s, "\n");
40 ret = trace_seq_printf(s, "\tpadding : type == %d\n",
41 RINGBUF_TYPE_PADDING);
42 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
43 RINGBUF_TYPE_TIME_EXTEND);
44 ret = trace_seq_printf(s, "\tdata max type_len == %d\n",
45 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
46
47 return ret;
48}
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147enum {
148 RB_BUFFERS_ON_BIT = 0,
149 RB_BUFFERS_DISABLED_BIT = 1,
150};
151
152enum {
153 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
154 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
155};
156
157static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
158
159
160#define RB_BUFFER_OFF (1 << 20)
161
162#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
163
164
165
166
167
168
169
170void tracing_off_permanent(void)
171{
172 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
173}
174
175#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
176#define RB_ALIGNMENT 4U
177#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
178#define RB_EVNT_MIN_SIZE 8U
179
180#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
181# define RB_FORCE_8BYTE_ALIGNMENT 0
182# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
183#else
184# define RB_FORCE_8BYTE_ALIGNMENT 1
185# define RB_ARCH_ALIGNMENT 8U
186#endif
187
188
189#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
190
191enum {
192 RB_LEN_TIME_EXTEND = 8,
193 RB_LEN_TIME_STAMP = 16,
194};
195
196#define skip_time_extend(event) \
197 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
198
199static inline int rb_null_event(struct ring_buffer_event *event)
200{
201 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
202}
203
204static void rb_event_set_padding(struct ring_buffer_event *event)
205{
206
207 event->type_len = RINGBUF_TYPE_PADDING;
208 event->time_delta = 0;
209}
210
211static unsigned
212rb_event_data_length(struct ring_buffer_event *event)
213{
214 unsigned length;
215
216 if (event->type_len)
217 length = event->type_len * RB_ALIGNMENT;
218 else
219 length = event->array[0];
220 return length + RB_EVNT_HDR_SIZE;
221}
222
223
224
225
226
227
228static inline unsigned
229rb_event_length(struct ring_buffer_event *event)
230{
231 switch (event->type_len) {
232 case RINGBUF_TYPE_PADDING:
233 if (rb_null_event(event))
234
235 return -1;
236 return event->array[0] + RB_EVNT_HDR_SIZE;
237
238 case RINGBUF_TYPE_TIME_EXTEND:
239 return RB_LEN_TIME_EXTEND;
240
241 case RINGBUF_TYPE_TIME_STAMP:
242 return RB_LEN_TIME_STAMP;
243
244 case RINGBUF_TYPE_DATA:
245 return rb_event_data_length(event);
246 default:
247 BUG();
248 }
249
250 return 0;
251}
252
253
254
255
256
257static inline unsigned
258rb_event_ts_length(struct ring_buffer_event *event)
259{
260 unsigned len = 0;
261
262 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
263
264 len = RB_LEN_TIME_EXTEND;
265 event = skip_time_extend(event);
266 }
267 return len + rb_event_length(event);
268}
269
270
271
272
273
274
275
276
277
278
279
280unsigned ring_buffer_event_length(struct ring_buffer_event *event)
281{
282 unsigned length;
283
284 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
285 event = skip_time_extend(event);
286
287 length = rb_event_length(event);
288 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
289 return length;
290 length -= RB_EVNT_HDR_SIZE;
291 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
292 length -= sizeof(event->array[0]);
293 return length;
294}
295EXPORT_SYMBOL_GPL(ring_buffer_event_length);
296
297
298static void *
299rb_event_data(struct ring_buffer_event *event)
300{
301 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
302 event = skip_time_extend(event);
303 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
304
305 if (event->type_len)
306 return (void *)&event->array[0];
307
308 return (void *)&event->array[1];
309}
310
311
312
313
314
315void *ring_buffer_event_data(struct ring_buffer_event *event)
316{
317 return rb_event_data(event);
318}
319EXPORT_SYMBOL_GPL(ring_buffer_event_data);
320
321#define for_each_buffer_cpu(buffer, cpu) \
322 for_each_cpu(cpu, buffer->cpumask)
323
324#define TS_SHIFT 27
325#define TS_MASK ((1ULL << TS_SHIFT) - 1)
326#define TS_DELTA_TEST (~TS_MASK)
327
328
329#define RB_MISSED_EVENTS (1 << 31)
330
331#define RB_MISSED_STORED (1 << 30)
332
333struct buffer_data_page {
334 u64 time_stamp;
335 local_t commit;
336 unsigned char data[];
337};
338
339
340
341
342
343
344
345
346
347struct buffer_page {
348 struct list_head list;
349 local_t write;
350 unsigned read;
351 local_t entries;
352 unsigned long real_end;
353 struct buffer_data_page *page;
354};
355
356
357
358
359
360
361
362
363
364
365
366
367
368#define RB_WRITE_MASK 0xfffff
369#define RB_WRITE_INTCNT (1 << 20)
370
371static void rb_init_page(struct buffer_data_page *bpage)
372{
373 local_set(&bpage->commit, 0);
374}
375
376
377
378
379
380
381
382size_t ring_buffer_page_len(void *page)
383{
384 return local_read(&((struct buffer_data_page *)page)->commit)
385 + BUF_PAGE_HDR_SIZE;
386}
387
388
389
390
391
392static void free_buffer_page(struct buffer_page *bpage)
393{
394 free_page((unsigned long)bpage->page);
395 kfree(bpage);
396}
397
398
399
400
401static inline int test_time_stamp(u64 delta)
402{
403 if (delta & TS_DELTA_TEST)
404 return 1;
405 return 0;
406}
407
408#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
409
410
411#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
412
413int ring_buffer_print_page_header(struct trace_seq *s)
414{
415 struct buffer_data_page field;
416 int ret;
417
418 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
419 "offset:0;\tsize:%u;\tsigned:%u;\n",
420 (unsigned int)sizeof(field.time_stamp),
421 (unsigned int)is_signed_type(u64));
422
423 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
424 "offset:%u;\tsize:%u;\tsigned:%u;\n",
425 (unsigned int)offsetof(typeof(field), commit),
426 (unsigned int)sizeof(field.commit),
427 (unsigned int)is_signed_type(long));
428
429 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
430 "offset:%u;\tsize:%u;\tsigned:%u;\n",
431 (unsigned int)offsetof(typeof(field), commit),
432 1,
433 (unsigned int)is_signed_type(long));
434
435 ret = trace_seq_printf(s, "\tfield: char data;\t"
436 "offset:%u;\tsize:%u;\tsigned:%u;\n",
437 (unsigned int)offsetof(typeof(field), data),
438 (unsigned int)BUF_PAGE_SIZE,
439 (unsigned int)is_signed_type(char));
440
441 return ret;
442}
443
444
445
446
447struct ring_buffer_per_cpu {
448 int cpu;
449 atomic_t record_disabled;
450 struct ring_buffer *buffer;
451 raw_spinlock_t reader_lock;
452 arch_spinlock_t lock;
453 struct lock_class_key lock_key;
454 unsigned int nr_pages;
455 struct list_head *pages;
456 struct buffer_page *head_page;
457 struct buffer_page *tail_page;
458 struct buffer_page *commit_page;
459 struct buffer_page *reader_page;
460 unsigned long lost_events;
461 unsigned long last_overrun;
462 local_t entries_bytes;
463 local_t entries;
464 local_t overrun;
465 local_t commit_overrun;
466 local_t dropped_events;
467 local_t committing;
468 local_t commits;
469 unsigned long read;
470 unsigned long read_bytes;
471 u64 write_stamp;
472 u64 read_stamp;
473
474 int nr_pages_to_update;
475 struct list_head new_pages;
476 struct work_struct update_pages_work;
477 struct completion update_done;
478};
479
480struct ring_buffer {
481 unsigned flags;
482 int cpus;
483 atomic_t record_disabled;
484 atomic_t resize_disabled;
485 cpumask_var_t cpumask;
486
487 struct lock_class_key *reader_lock_key;
488
489 struct mutex mutex;
490
491 struct ring_buffer_per_cpu **buffers;
492
493#ifdef CONFIG_HOTPLUG_CPU
494 struct notifier_block cpu_notify;
495#endif
496 u64 (*clock)(void);
497};
498
499struct ring_buffer_iter {
500 struct ring_buffer_per_cpu *cpu_buffer;
501 unsigned long head;
502 struct buffer_page *head_page;
503 struct buffer_page *cache_reader_page;
504 unsigned long cache_read;
505 u64 read_stamp;
506};
507
508
509#define RB_WARN_ON(b, cond) \
510 ({ \
511 int _____ret = unlikely(cond); \
512 if (_____ret) { \
513 if (__same_type(*(b), struct ring_buffer_per_cpu)) { \
514 struct ring_buffer_per_cpu *__b = \
515 (void *)b; \
516 atomic_inc(&__b->buffer->record_disabled); \
517 } else \
518 atomic_inc(&b->record_disabled); \
519 WARN_ON(1); \
520 } \
521 _____ret; \
522 })
523
524
525#define DEBUG_SHIFT 0
526
527static inline u64 rb_time_stamp(struct ring_buffer *buffer)
528{
529
530 return buffer->clock() << DEBUG_SHIFT;
531}
532
533u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
534{
535 u64 time;
536
537 preempt_disable_notrace();
538 time = rb_time_stamp(buffer);
539 preempt_enable_no_resched_notrace();
540
541 return time;
542}
543EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
544
545void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
546 int cpu, u64 *ts)
547{
548
549 *ts >>= DEBUG_SHIFT;
550}
551EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622#define RB_PAGE_NORMAL 0UL
623#define RB_PAGE_HEAD 1UL
624#define RB_PAGE_UPDATE 2UL
625
626
627#define RB_FLAG_MASK 3UL
628
629
630#define RB_PAGE_MOVED 4UL
631
632
633
634
635static struct list_head *rb_list_head(struct list_head *list)
636{
637 unsigned long val = (unsigned long)list;
638
639 return (struct list_head *)(val & ~RB_FLAG_MASK);
640}
641
642
643
644
645
646
647
648
649
650static inline int
651rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
652 struct buffer_page *page, struct list_head *list)
653{
654 unsigned long val;
655
656 val = (unsigned long)list->next;
657
658 if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
659 return RB_PAGE_MOVED;
660
661 return val & RB_FLAG_MASK;
662}
663
664
665
666
667
668
669
670
671static int rb_is_reader_page(struct buffer_page *page)
672{
673 struct list_head *list = page->list.prev;
674
675 return rb_list_head(list->next) != &page->list;
676}
677
678
679
680
681static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
682 struct list_head *list)
683{
684 unsigned long *ptr;
685
686 ptr = (unsigned long *)&list->next;
687 *ptr |= RB_PAGE_HEAD;
688 *ptr &= ~RB_PAGE_UPDATE;
689}
690
691
692
693
694static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
695{
696 struct buffer_page *head;
697
698 head = cpu_buffer->head_page;
699 if (!head)
700 return;
701
702
703
704
705 rb_set_list_to_head(cpu_buffer, head->list.prev);
706}
707
708static void rb_list_head_clear(struct list_head *list)
709{
710 unsigned long *ptr = (unsigned long *)&list->next;
711
712 *ptr &= ~RB_FLAG_MASK;
713}
714
715
716
717
718static void
719rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
720{
721 struct list_head *hd;
722
723
724 rb_list_head_clear(cpu_buffer->pages);
725
726 list_for_each(hd, cpu_buffer->pages)
727 rb_list_head_clear(hd);
728}
729
730static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
731 struct buffer_page *head,
732 struct buffer_page *prev,
733 int old_flag, int new_flag)
734{
735 struct list_head *list;
736 unsigned long val = (unsigned long)&head->list;
737 unsigned long ret;
738
739 list = &prev->list;
740
741 val &= ~RB_FLAG_MASK;
742
743 ret = cmpxchg((unsigned long *)&list->next,
744 val | old_flag, val | new_flag);
745
746
747 if ((ret & ~RB_FLAG_MASK) != val)
748 return RB_PAGE_MOVED;
749
750 return ret & RB_FLAG_MASK;
751}
752
753static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
754 struct buffer_page *head,
755 struct buffer_page *prev,
756 int old_flag)
757{
758 return rb_head_page_set(cpu_buffer, head, prev,
759 old_flag, RB_PAGE_UPDATE);
760}
761
762static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
763 struct buffer_page *head,
764 struct buffer_page *prev,
765 int old_flag)
766{
767 return rb_head_page_set(cpu_buffer, head, prev,
768 old_flag, RB_PAGE_HEAD);
769}
770
771static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
772 struct buffer_page *head,
773 struct buffer_page *prev,
774 int old_flag)
775{
776 return rb_head_page_set(cpu_buffer, head, prev,
777 old_flag, RB_PAGE_NORMAL);
778}
779
780static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
781 struct buffer_page **bpage)
782{
783 struct list_head *p = rb_list_head((*bpage)->list.next);
784
785 *bpage = list_entry(p, struct buffer_page, list);
786}
787
788static struct buffer_page *
789rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
790{
791 struct buffer_page *head;
792 struct buffer_page *page;
793 struct list_head *list;
794 int i;
795
796 if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
797 return NULL;
798
799
800 list = cpu_buffer->pages;
801 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
802 return NULL;
803
804 page = head = cpu_buffer->head_page;
805
806
807
808
809
810
811 for (i = 0; i < 3; i++) {
812 do {
813 if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
814 cpu_buffer->head_page = page;
815 return page;
816 }
817 rb_inc_page(cpu_buffer, &page);
818 } while (page != head);
819 }
820
821 RB_WARN_ON(cpu_buffer, 1);
822
823 return NULL;
824}
825
826static int rb_head_page_replace(struct buffer_page *old,
827 struct buffer_page *new)
828{
829 unsigned long *ptr = (unsigned long *)&old->list.prev->next;
830 unsigned long val;
831 unsigned long ret;
832
833 val = *ptr & ~RB_FLAG_MASK;
834 val |= RB_PAGE_HEAD;
835
836 ret = cmpxchg(ptr, val, (unsigned long)&new->list);
837
838 return ret == val;
839}
840
841
842
843
844
845
846static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
847 struct buffer_page *tail_page,
848 struct buffer_page *next_page)
849{
850 struct buffer_page *old_tail;
851 unsigned long old_entries;
852 unsigned long old_write;
853 int ret = 0;
854
855
856
857
858
859
860
861
862
863
864 old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
865 old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
866
867
868
869
870
871 barrier();
872
873
874
875
876
877
878 if (tail_page == cpu_buffer->tail_page) {
879
880 unsigned long val = old_write & ~RB_WRITE_MASK;
881 unsigned long eval = old_entries & ~RB_WRITE_MASK;
882
883
884
885
886
887
888
889
890
891
892
893 (void)local_cmpxchg(&next_page->write, old_write, val);
894 (void)local_cmpxchg(&next_page->entries, old_entries, eval);
895
896
897
898
899
900
901 local_set(&next_page->page->commit, 0);
902
903 old_tail = cmpxchg(&cpu_buffer->tail_page,
904 tail_page, next_page);
905
906 if (old_tail == tail_page)
907 ret = 1;
908 }
909
910 return ret;
911}
912
913static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
914 struct buffer_page *bpage)
915{
916 unsigned long val = (unsigned long)bpage;
917
918 if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
919 return 1;
920
921 return 0;
922}
923
924
925
926
927static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
928 struct list_head *list)
929{
930 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
931 return 1;
932 if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
933 return 1;
934 return 0;
935}
936
937
938
939
940
941
942
943
944static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
945{
946 struct list_head *head = cpu_buffer->pages;
947 struct buffer_page *bpage, *tmp;
948
949
950 if (cpu_buffer->head_page)
951 rb_set_head_page(cpu_buffer);
952
953 rb_head_page_deactivate(cpu_buffer);
954
955 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
956 return -1;
957 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
958 return -1;
959
960 if (rb_check_list(cpu_buffer, head))
961 return -1;
962
963 list_for_each_entry_safe(bpage, tmp, head, list) {
964 if (RB_WARN_ON(cpu_buffer,
965 bpage->list.next->prev != &bpage->list))
966 return -1;
967 if (RB_WARN_ON(cpu_buffer,
968 bpage->list.prev->next != &bpage->list))
969 return -1;
970 if (rb_check_list(cpu_buffer, &bpage->list))
971 return -1;
972 }
973
974 rb_head_page_activate(cpu_buffer);
975
976 return 0;
977}
978
979static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu)
980{
981 int i;
982 struct buffer_page *bpage, *tmp;
983
984 for (i = 0; i < nr_pages; i++) {
985 struct page *page;
986
987
988
989
990
991 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
992 GFP_KERNEL | __GFP_NORETRY,
993 cpu_to_node(cpu));
994 if (!bpage)
995 goto free_pages;
996
997 list_add(&bpage->list, pages);
998
999 page = alloc_pages_node(cpu_to_node(cpu),
1000 GFP_KERNEL | __GFP_NORETRY, 0);
1001 if (!page)
1002 goto free_pages;
1003 bpage->page = page_address(page);
1004 rb_init_page(bpage->page);
1005 }
1006
1007 return 0;
1008
1009free_pages:
1010 list_for_each_entry_safe(bpage, tmp, pages, list) {
1011 list_del_init(&bpage->list);
1012 free_buffer_page(bpage);
1013 }
1014
1015 return -ENOMEM;
1016}
1017
1018static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1019 unsigned nr_pages)
1020{
1021 LIST_HEAD(pages);
1022
1023 WARN_ON(!nr_pages);
1024
1025 if (__rb_allocate_pages(nr_pages, &pages, cpu_buffer->cpu))
1026 return -ENOMEM;
1027
1028
1029
1030
1031
1032
1033 cpu_buffer->pages = pages.next;
1034 list_del(&pages);
1035
1036 cpu_buffer->nr_pages = nr_pages;
1037
1038 rb_check_pages(cpu_buffer);
1039
1040 return 0;
1041}
1042
1043static struct ring_buffer_per_cpu *
1044rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
1045{
1046 struct ring_buffer_per_cpu *cpu_buffer;
1047 struct buffer_page *bpage;
1048 struct page *page;
1049 int ret;
1050
1051 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
1052 GFP_KERNEL, cpu_to_node(cpu));
1053 if (!cpu_buffer)
1054 return NULL;
1055
1056 cpu_buffer->cpu = cpu;
1057 cpu_buffer->buffer = buffer;
1058 raw_spin_lock_init(&cpu_buffer->reader_lock);
1059 lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
1060 cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
1061 INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
1062 init_completion(&cpu_buffer->update_done);
1063
1064 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1065 GFP_KERNEL, cpu_to_node(cpu));
1066 if (!bpage)
1067 goto fail_free_buffer;
1068
1069 rb_check_bpage(cpu_buffer, bpage);
1070
1071 cpu_buffer->reader_page = bpage;
1072 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1073 if (!page)
1074 goto fail_free_reader;
1075 bpage->page = page_address(page);
1076 rb_init_page(bpage->page);
1077
1078 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1079 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1080
1081 ret = rb_allocate_pages(cpu_buffer, nr_pages);
1082 if (ret < 0)
1083 goto fail_free_reader;
1084
1085 cpu_buffer->head_page
1086 = list_entry(cpu_buffer->pages, struct buffer_page, list);
1087 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
1088
1089 rb_head_page_activate(cpu_buffer);
1090
1091 return cpu_buffer;
1092
1093 fail_free_reader:
1094 free_buffer_page(cpu_buffer->reader_page);
1095
1096 fail_free_buffer:
1097 kfree(cpu_buffer);
1098 return NULL;
1099}
1100
1101static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
1102{
1103 struct list_head *head = cpu_buffer->pages;
1104 struct buffer_page *bpage, *tmp;
1105
1106 free_buffer_page(cpu_buffer->reader_page);
1107
1108 rb_head_page_deactivate(cpu_buffer);
1109
1110 if (head) {
1111 list_for_each_entry_safe(bpage, tmp, head, list) {
1112 list_del_init(&bpage->list);
1113 free_buffer_page(bpage);
1114 }
1115 bpage = list_entry(head, struct buffer_page, list);
1116 free_buffer_page(bpage);
1117 }
1118
1119 kfree(cpu_buffer);
1120}
1121
1122#ifdef CONFIG_HOTPLUG_CPU
1123static int rb_cpu_notify(struct notifier_block *self,
1124 unsigned long action, void *hcpu);
1125#endif
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1138 struct lock_class_key *key)
1139{
1140 struct ring_buffer *buffer;
1141 int bsize;
1142 int cpu, nr_pages;
1143
1144
1145 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
1146 GFP_KERNEL);
1147 if (!buffer)
1148 return NULL;
1149
1150 if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
1151 goto fail_free_buffer;
1152
1153 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1154 buffer->flags = flags;
1155 buffer->clock = trace_clock_local;
1156 buffer->reader_lock_key = key;
1157
1158
1159 if (nr_pages < 2)
1160 nr_pages = 2;
1161
1162
1163
1164
1165
1166
1167#ifdef CONFIG_HOTPLUG_CPU
1168 get_online_cpus();
1169 cpumask_copy(buffer->cpumask, cpu_online_mask);
1170#else
1171 cpumask_copy(buffer->cpumask, cpu_possible_mask);
1172#endif
1173 buffer->cpus = nr_cpu_ids;
1174
1175 bsize = sizeof(void *) * nr_cpu_ids;
1176 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
1177 GFP_KERNEL);
1178 if (!buffer->buffers)
1179 goto fail_free_cpumask;
1180
1181 for_each_buffer_cpu(buffer, cpu) {
1182 buffer->buffers[cpu] =
1183 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
1184 if (!buffer->buffers[cpu])
1185 goto fail_free_buffers;
1186 }
1187
1188#ifdef CONFIG_HOTPLUG_CPU
1189 buffer->cpu_notify.notifier_call = rb_cpu_notify;
1190 buffer->cpu_notify.priority = 0;
1191 register_cpu_notifier(&buffer->cpu_notify);
1192#endif
1193
1194 put_online_cpus();
1195 mutex_init(&buffer->mutex);
1196
1197 return buffer;
1198
1199 fail_free_buffers:
1200 for_each_buffer_cpu(buffer, cpu) {
1201 if (buffer->buffers[cpu])
1202 rb_free_cpu_buffer(buffer->buffers[cpu]);
1203 }
1204 kfree(buffer->buffers);
1205
1206 fail_free_cpumask:
1207 free_cpumask_var(buffer->cpumask);
1208 put_online_cpus();
1209
1210 fail_free_buffer:
1211 kfree(buffer);
1212 return NULL;
1213}
1214EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
1215
1216
1217
1218
1219
1220void
1221ring_buffer_free(struct ring_buffer *buffer)
1222{
1223 int cpu;
1224
1225 get_online_cpus();
1226
1227#ifdef CONFIG_HOTPLUG_CPU
1228 unregister_cpu_notifier(&buffer->cpu_notify);
1229#endif
1230
1231 for_each_buffer_cpu(buffer, cpu)
1232 rb_free_cpu_buffer(buffer->buffers[cpu]);
1233
1234 put_online_cpus();
1235
1236 kfree(buffer->buffers);
1237 free_cpumask_var(buffer->cpumask);
1238
1239 kfree(buffer);
1240}
1241EXPORT_SYMBOL_GPL(ring_buffer_free);
1242
1243void ring_buffer_set_clock(struct ring_buffer *buffer,
1244 u64 (*clock)(void))
1245{
1246 buffer->clock = clock;
1247}
1248
1249static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
1250
1251static inline unsigned long rb_page_entries(struct buffer_page *bpage)
1252{
1253 return local_read(&bpage->entries) & RB_WRITE_MASK;
1254}
1255
1256static inline unsigned long rb_page_write(struct buffer_page *bpage)
1257{
1258 return local_read(&bpage->write) & RB_WRITE_MASK;
1259}
1260
1261static int
1262rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages)
1263{
1264 struct list_head *tail_page, *to_remove, *next_page;
1265 struct buffer_page *to_remove_page, *tmp_iter_page;
1266 struct buffer_page *last_page, *first_page;
1267 unsigned int nr_removed;
1268 unsigned long head_bit;
1269 int page_entries;
1270
1271 head_bit = 0;
1272
1273 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1274 atomic_inc(&cpu_buffer->record_disabled);
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 tail_page = &cpu_buffer->tail_page->list;
1285
1286
1287
1288
1289
1290 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
1291 tail_page = rb_list_head(tail_page->next);
1292 to_remove = tail_page;
1293
1294
1295 first_page = list_entry(rb_list_head(to_remove->next),
1296 struct buffer_page, list);
1297
1298 for (nr_removed = 0; nr_removed < nr_pages; nr_removed++) {
1299 to_remove = rb_list_head(to_remove)->next;
1300 head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
1301 }
1302
1303 next_page = rb_list_head(to_remove)->next;
1304
1305
1306
1307
1308
1309
1310 tail_page->next = (struct list_head *)((unsigned long)next_page |
1311 head_bit);
1312 next_page = rb_list_head(next_page);
1313 next_page->prev = tail_page;
1314
1315
1316 cpu_buffer->pages = next_page;
1317
1318
1319 if (head_bit)
1320 cpu_buffer->head_page = list_entry(next_page,
1321 struct buffer_page, list);
1322
1323
1324
1325
1326
1327 cpu_buffer->read = 0;
1328
1329
1330 atomic_dec(&cpu_buffer->record_disabled);
1331 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1332
1333 RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages));
1334
1335
1336 last_page = list_entry(rb_list_head(to_remove), struct buffer_page,
1337 list);
1338 tmp_iter_page = first_page;
1339
1340 do {
1341 to_remove_page = tmp_iter_page;
1342 rb_inc_page(cpu_buffer, &tmp_iter_page);
1343
1344
1345 page_entries = rb_page_entries(to_remove_page);
1346 if (page_entries) {
1347
1348
1349
1350
1351
1352
1353 local_add(page_entries, &cpu_buffer->overrun);
1354 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1355 }
1356
1357
1358
1359
1360
1361 free_buffer_page(to_remove_page);
1362 nr_removed--;
1363
1364 } while (to_remove_page != last_page);
1365
1366 RB_WARN_ON(cpu_buffer, nr_removed);
1367
1368 return nr_removed == 0;
1369}
1370
1371static int
1372rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
1373{
1374 struct list_head *pages = &cpu_buffer->new_pages;
1375 int retries, success;
1376
1377 raw_spin_lock_irq(&cpu_buffer->reader_lock);
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 retries = 10;
1393 success = 0;
1394 while (retries--) {
1395 struct list_head *head_page, *prev_page, *r;
1396 struct list_head *last_page, *first_page;
1397 struct list_head *head_page_with_bit;
1398
1399 head_page = &rb_set_head_page(cpu_buffer)->list;
1400 if (!head_page)
1401 break;
1402 prev_page = head_page->prev;
1403
1404 first_page = pages->next;
1405 last_page = pages->prev;
1406
1407 head_page_with_bit = (struct list_head *)
1408 ((unsigned long)head_page | RB_PAGE_HEAD);
1409
1410 last_page->next = head_page_with_bit;
1411 first_page->prev = prev_page;
1412
1413 r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
1414
1415 if (r == head_page_with_bit) {
1416
1417
1418
1419
1420
1421 head_page->prev = last_page;
1422 success = 1;
1423 break;
1424 }
1425 }
1426
1427 if (success)
1428 INIT_LIST_HEAD(pages);
1429
1430
1431
1432
1433 RB_WARN_ON(cpu_buffer, !success);
1434 raw_spin_unlock_irq(&cpu_buffer->reader_lock);
1435
1436
1437 if (!success) {
1438 struct buffer_page *bpage, *tmp;
1439 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1440 list) {
1441 list_del_init(&bpage->list);
1442 free_buffer_page(bpage);
1443 }
1444 }
1445 return success;
1446}
1447
1448static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
1449{
1450 int success;
1451
1452 if (cpu_buffer->nr_pages_to_update > 0)
1453 success = rb_insert_pages(cpu_buffer);
1454 else
1455 success = rb_remove_pages(cpu_buffer,
1456 -cpu_buffer->nr_pages_to_update);
1457
1458 if (success)
1459 cpu_buffer->nr_pages += cpu_buffer->nr_pages_to_update;
1460}
1461
1462static void update_pages_handler(struct work_struct *work)
1463{
1464 struct ring_buffer_per_cpu *cpu_buffer = container_of(work,
1465 struct ring_buffer_per_cpu, update_pages_work);
1466 rb_update_pages(cpu_buffer);
1467 complete(&cpu_buffer->update_done);
1468}
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1480 int cpu_id)
1481{
1482 struct ring_buffer_per_cpu *cpu_buffer;
1483 unsigned nr_pages;
1484 int cpu, err = 0;
1485
1486
1487
1488
1489 if (!buffer)
1490 return size;
1491
1492
1493 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1494 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1495 return size;
1496
1497 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1498 size *= BUF_PAGE_SIZE;
1499
1500
1501 if (size < BUF_PAGE_SIZE * 2)
1502 size = BUF_PAGE_SIZE * 2;
1503
1504 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1505
1506
1507
1508
1509
1510
1511 if (atomic_read(&buffer->resize_disabled))
1512 return -EBUSY;
1513
1514
1515 mutex_lock(&buffer->mutex);
1516
1517 if (cpu_id == RING_BUFFER_ALL_CPUS) {
1518
1519 for_each_buffer_cpu(buffer, cpu) {
1520 cpu_buffer = buffer->buffers[cpu];
1521
1522 cpu_buffer->nr_pages_to_update = nr_pages -
1523 cpu_buffer->nr_pages;
1524
1525
1526
1527 if (cpu_buffer->nr_pages_to_update <= 0)
1528 continue;
1529
1530
1531
1532
1533 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1534 if (__rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1535 &cpu_buffer->new_pages, cpu)) {
1536
1537 err = -ENOMEM;
1538 goto out_err;
1539 }
1540 }
1541
1542 get_online_cpus();
1543
1544
1545
1546
1547
1548 for_each_buffer_cpu(buffer, cpu) {
1549 cpu_buffer = buffer->buffers[cpu];
1550 if (!cpu_buffer->nr_pages_to_update)
1551 continue;
1552
1553 if (cpu_online(cpu))
1554 schedule_work_on(cpu,
1555 &cpu_buffer->update_pages_work);
1556 else
1557 rb_update_pages(cpu_buffer);
1558 }
1559
1560
1561 for_each_buffer_cpu(buffer, cpu) {
1562 cpu_buffer = buffer->buffers[cpu];
1563 if (!cpu_buffer->nr_pages_to_update)
1564 continue;
1565
1566 if (cpu_online(cpu))
1567 wait_for_completion(&cpu_buffer->update_done);
1568 cpu_buffer->nr_pages_to_update = 0;
1569 }
1570
1571 put_online_cpus();
1572 } else {
1573
1574 if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
1575 goto out;
1576
1577 cpu_buffer = buffer->buffers[cpu_id];
1578
1579 if (nr_pages == cpu_buffer->nr_pages)
1580 goto out;
1581
1582 cpu_buffer->nr_pages_to_update = nr_pages -
1583 cpu_buffer->nr_pages;
1584
1585 INIT_LIST_HEAD(&cpu_buffer->new_pages);
1586 if (cpu_buffer->nr_pages_to_update > 0 &&
1587 __rb_allocate_pages(cpu_buffer->nr_pages_to_update,
1588 &cpu_buffer->new_pages, cpu_id)) {
1589 err = -ENOMEM;
1590 goto out_err;
1591 }
1592
1593 get_online_cpus();
1594
1595 if (cpu_online(cpu_id)) {
1596 schedule_work_on(cpu_id,
1597 &cpu_buffer->update_pages_work);
1598 wait_for_completion(&cpu_buffer->update_done);
1599 } else
1600 rb_update_pages(cpu_buffer);
1601
1602 cpu_buffer->nr_pages_to_update = 0;
1603 put_online_cpus();
1604 }
1605
1606 out:
1607
1608
1609
1610
1611
1612
1613
1614 if (atomic_read(&buffer->record_disabled)) {
1615 atomic_inc(&buffer->record_disabled);
1616
1617
1618
1619
1620
1621
1622 synchronize_sched();
1623 for_each_buffer_cpu(buffer, cpu) {
1624 cpu_buffer = buffer->buffers[cpu];
1625 rb_check_pages(cpu_buffer);
1626 }
1627 atomic_dec(&buffer->record_disabled);
1628 }
1629
1630 mutex_unlock(&buffer->mutex);
1631 return size;
1632
1633 out_err:
1634 for_each_buffer_cpu(buffer, cpu) {
1635 struct buffer_page *bpage, *tmp;
1636
1637 cpu_buffer = buffer->buffers[cpu];
1638 cpu_buffer->nr_pages_to_update = 0;
1639
1640 if (list_empty(&cpu_buffer->new_pages))
1641 continue;
1642
1643 list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages,
1644 list) {
1645 list_del_init(&bpage->list);
1646 free_buffer_page(bpage);
1647 }
1648 }
1649 mutex_unlock(&buffer->mutex);
1650 return err;
1651}
1652EXPORT_SYMBOL_GPL(ring_buffer_resize);
1653
1654void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1655{
1656 mutex_lock(&buffer->mutex);
1657 if (val)
1658 buffer->flags |= RB_FL_OVERWRITE;
1659 else
1660 buffer->flags &= ~RB_FL_OVERWRITE;
1661 mutex_unlock(&buffer->mutex);
1662}
1663EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1664
1665static inline void *
1666__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1667{
1668 return bpage->data + index;
1669}
1670
1671static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1672{
1673 return bpage->page->data + index;
1674}
1675
1676static inline struct ring_buffer_event *
1677rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1678{
1679 return __rb_page_index(cpu_buffer->reader_page,
1680 cpu_buffer->reader_page->read);
1681}
1682
1683static inline struct ring_buffer_event *
1684rb_iter_head_event(struct ring_buffer_iter *iter)
1685{
1686 return __rb_page_index(iter->head_page, iter->head);
1687}
1688
1689static inline unsigned rb_page_commit(struct buffer_page *bpage)
1690{
1691 return local_read(&bpage->page->commit);
1692}
1693
1694
1695static inline unsigned rb_page_size(struct buffer_page *bpage)
1696{
1697 return rb_page_commit(bpage);
1698}
1699
1700static inline unsigned
1701rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
1702{
1703 return rb_page_commit(cpu_buffer->commit_page);
1704}
1705
1706static inline unsigned
1707rb_event_index(struct ring_buffer_event *event)
1708{
1709 unsigned long addr = (unsigned long)event;
1710
1711 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1712}
1713
1714static inline int
1715rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
1716 struct ring_buffer_event *event)
1717{
1718 unsigned long addr = (unsigned long)event;
1719 unsigned long index;
1720
1721 index = rb_event_index(event);
1722 addr &= PAGE_MASK;
1723
1724 return cpu_buffer->commit_page->page == (void *)addr &&
1725 rb_commit_index(cpu_buffer) == index;
1726}
1727
1728static void
1729rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
1730{
1731 unsigned long max_count;
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741 again:
1742 max_count = cpu_buffer->nr_pages * 100;
1743
1744 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
1745 if (RB_WARN_ON(cpu_buffer, !(--max_count)))
1746 return;
1747 if (RB_WARN_ON(cpu_buffer,
1748 rb_is_reader_page(cpu_buffer->tail_page)))
1749 return;
1750 local_set(&cpu_buffer->commit_page->page->commit,
1751 rb_page_write(cpu_buffer->commit_page));
1752 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
1753 cpu_buffer->write_stamp =
1754 cpu_buffer->commit_page->page->time_stamp;
1755
1756 barrier();
1757 }
1758 while (rb_commit_index(cpu_buffer) !=
1759 rb_page_write(cpu_buffer->commit_page)) {
1760
1761 local_set(&cpu_buffer->commit_page->page->commit,
1762 rb_page_write(cpu_buffer->commit_page));
1763 RB_WARN_ON(cpu_buffer,
1764 local_read(&cpu_buffer->commit_page->page->commit) &
1765 ~RB_WRITE_MASK);
1766 barrier();
1767 }
1768
1769
1770 barrier();
1771
1772
1773
1774
1775
1776
1777 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
1778 goto again;
1779}
1780
1781static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1782{
1783 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
1784 cpu_buffer->reader_page->read = 0;
1785}
1786
1787static void rb_inc_iter(struct ring_buffer_iter *iter)
1788{
1789 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1790
1791
1792
1793
1794
1795
1796
1797 if (iter->head_page == cpu_buffer->reader_page)
1798 iter->head_page = rb_set_head_page(cpu_buffer);
1799 else
1800 rb_inc_page(cpu_buffer, &iter->head_page);
1801
1802 iter->read_stamp = iter->head_page->page->time_stamp;
1803 iter->head = 0;
1804}
1805
1806
1807static noinline struct ring_buffer_event *
1808rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1809{
1810 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1811
1812
1813 if (rb_event_index(event)) {
1814 event->time_delta = delta & TS_MASK;
1815 event->array[0] = delta >> TS_SHIFT;
1816 } else {
1817
1818 event->time_delta = 0;
1819 event->array[0] = 0;
1820 }
1821
1822 return skip_time_extend(event);
1823}
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836static void
1837rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1838 struct ring_buffer_event *event, unsigned length,
1839 int add_timestamp, u64 delta)
1840{
1841
1842 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1843 delta = 0;
1844
1845
1846
1847
1848
1849 if (unlikely(add_timestamp)) {
1850 event = rb_add_time_stamp(event, delta);
1851 length -= RB_LEN_TIME_EXTEND;
1852 delta = 0;
1853 }
1854
1855 event->time_delta = delta;
1856 length -= RB_EVNT_HDR_SIZE;
1857 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1858 event->type_len = 0;
1859 event->array[0] = length;
1860 } else
1861 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1862}
1863
1864
1865
1866
1867
1868
1869
1870
1871static int
1872rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
1873 struct buffer_page *tail_page,
1874 struct buffer_page *next_page)
1875{
1876 struct buffer_page *new_head;
1877 int entries;
1878 int type;
1879 int ret;
1880
1881 entries = rb_page_entries(next_page);
1882
1883
1884
1885
1886
1887
1888 type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
1889 RB_PAGE_HEAD);
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902 switch (type) {
1903 case RB_PAGE_HEAD:
1904
1905
1906
1907
1908
1909 local_add(entries, &cpu_buffer->overrun);
1910 local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
1911
1912
1913
1914
1915
1916
1917
1918 break;
1919
1920 case RB_PAGE_UPDATE:
1921
1922
1923
1924
1925 break;
1926 case RB_PAGE_NORMAL:
1927
1928
1929
1930
1931
1932 return 1;
1933 case RB_PAGE_MOVED:
1934
1935
1936
1937
1938
1939 return 1;
1940 default:
1941 RB_WARN_ON(cpu_buffer, 1);
1942 return -1;
1943 }
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959 new_head = next_page;
1960 rb_inc_page(cpu_buffer, &new_head);
1961
1962 ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
1963 RB_PAGE_NORMAL);
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973 switch (ret) {
1974 case RB_PAGE_HEAD:
1975 case RB_PAGE_NORMAL:
1976
1977 break;
1978 default:
1979 RB_WARN_ON(cpu_buffer, 1);
1980 return -1;
1981 }
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993 if (ret == RB_PAGE_NORMAL) {
1994
1995
1996
1997
1998 if (cpu_buffer->tail_page != tail_page &&
1999 cpu_buffer->tail_page != next_page)
2000 rb_head_page_set_normal(cpu_buffer, new_head,
2001 next_page,
2002 RB_PAGE_HEAD);
2003 }
2004
2005
2006
2007
2008
2009
2010 if (type == RB_PAGE_HEAD) {
2011 ret = rb_head_page_set_normal(cpu_buffer, next_page,
2012 tail_page,
2013 RB_PAGE_UPDATE);
2014 if (RB_WARN_ON(cpu_buffer,
2015 ret != RB_PAGE_UPDATE))
2016 return -1;
2017 }
2018
2019 return 0;
2020}
2021
2022static unsigned rb_calculate_event_length(unsigned length)
2023{
2024 struct ring_buffer_event event;
2025
2026
2027 if (!length)
2028 length = 1;
2029
2030 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
2031 length += sizeof(event.array[0]);
2032
2033 length += RB_EVNT_HDR_SIZE;
2034 length = ALIGN(length, RB_ARCH_ALIGNMENT);
2035
2036 return length;
2037}
2038
2039static inline void
2040rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
2041 struct buffer_page *tail_page,
2042 unsigned long tail, unsigned long length)
2043{
2044 struct ring_buffer_event *event;
2045
2046
2047
2048
2049
2050 if (tail >= BUF_PAGE_SIZE) {
2051
2052
2053
2054
2055
2056 if (tail == BUF_PAGE_SIZE)
2057 tail_page->real_end = 0;
2058
2059 local_sub(length, &tail_page->write);
2060 return;
2061 }
2062
2063 event = __rb_page_index(tail_page, tail);
2064 kmemcheck_annotate_bitfield(event, bitfield);
2065
2066
2067 local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2068
2069
2070
2071
2072
2073
2074 tail_page->real_end = tail;
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087 if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2088
2089
2090
2091 rb_event_set_padding(event);
2092
2093
2094 local_sub(length, &tail_page->write);
2095 return;
2096 }
2097
2098
2099 event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2100 event->type_len = RINGBUF_TYPE_PADDING;
2101
2102 event->time_delta = 1;
2103
2104
2105 length = (tail + length) - BUF_PAGE_SIZE;
2106 local_sub(length, &tail_page->write);
2107}
2108
2109
2110
2111
2112static noinline struct ring_buffer_event *
2113rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
2114 unsigned long length, unsigned long tail,
2115 struct buffer_page *tail_page, u64 ts)
2116{
2117 struct buffer_page *commit_page = cpu_buffer->commit_page;
2118 struct ring_buffer *buffer = cpu_buffer->buffer;
2119 struct buffer_page *next_page;
2120 int ret;
2121
2122 next_page = tail_page;
2123
2124 rb_inc_page(cpu_buffer, &next_page);
2125
2126
2127
2128
2129
2130
2131 if (unlikely(next_page == commit_page)) {
2132 local_inc(&cpu_buffer->commit_overrun);
2133 goto out_reset;
2134 }
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150 if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
2151
2152
2153
2154
2155
2156 if (!rb_is_reader_page(cpu_buffer->commit_page)) {
2157
2158
2159
2160
2161 if (!(buffer->flags & RB_FL_OVERWRITE)) {
2162 local_inc(&cpu_buffer->dropped_events);
2163 goto out_reset;
2164 }
2165
2166 ret = rb_handle_head_page(cpu_buffer,
2167 tail_page,
2168 next_page);
2169 if (ret < 0)
2170 goto out_reset;
2171 if (ret)
2172 goto out_again;
2173 } else {
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184 if (unlikely((cpu_buffer->commit_page !=
2185 cpu_buffer->tail_page) &&
2186 (cpu_buffer->commit_page ==
2187 cpu_buffer->reader_page))) {
2188 local_inc(&cpu_buffer->commit_overrun);
2189 goto out_reset;
2190 }
2191 }
2192 }
2193
2194 ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
2195 if (ret) {
2196
2197
2198
2199
2200 ts = rb_time_stamp(buffer);
2201 next_page->page->time_stamp = ts;
2202 }
2203
2204 out_again:
2205
2206 rb_reset_tail(cpu_buffer, tail_page, tail, length);
2207
2208
2209 return ERR_PTR(-EAGAIN);
2210
2211 out_reset:
2212
2213 rb_reset_tail(cpu_buffer, tail_page, tail, length);
2214
2215 return NULL;
2216}
2217
2218static struct ring_buffer_event *
2219__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
2220 unsigned long length, u64 ts,
2221 u64 delta, int add_timestamp)
2222{
2223 struct buffer_page *tail_page;
2224 struct ring_buffer_event *event;
2225 unsigned long tail, write;
2226
2227
2228
2229
2230
2231
2232 if (unlikely(add_timestamp))
2233 length += RB_LEN_TIME_EXTEND;
2234
2235 tail_page = cpu_buffer->tail_page;
2236 write = local_add_return(length, &tail_page->write);
2237
2238
2239 write &= RB_WRITE_MASK;
2240 tail = write - length;
2241
2242
2243 if (unlikely(write > BUF_PAGE_SIZE))
2244 return rb_move_tail(cpu_buffer, length, tail,
2245 tail_page, ts);
2246
2247
2248
2249 event = __rb_page_index(tail_page, tail);
2250 kmemcheck_annotate_bitfield(event, bitfield);
2251 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
2252
2253 local_inc(&tail_page->entries);
2254
2255
2256
2257
2258
2259 if (!tail)
2260 tail_page->page->time_stamp = ts;
2261
2262
2263 local_add(length, &cpu_buffer->entries_bytes);
2264
2265 return event;
2266}
2267
2268static inline int
2269rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2270 struct ring_buffer_event *event)
2271{
2272 unsigned long new_index, old_index;
2273 struct buffer_page *bpage;
2274 unsigned long index;
2275 unsigned long addr;
2276
2277 new_index = rb_event_index(event);
2278 old_index = new_index + rb_event_ts_length(event);
2279 addr = (unsigned long)event;
2280 addr &= PAGE_MASK;
2281
2282 bpage = cpu_buffer->tail_page;
2283
2284 if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
2285 unsigned long write_mask =
2286 local_read(&bpage->write) & ~RB_WRITE_MASK;
2287 unsigned long event_length = rb_event_length(event);
2288
2289
2290
2291
2292
2293
2294 old_index += write_mask;
2295 new_index += write_mask;
2296 index = local_cmpxchg(&bpage->write, old_index, new_index);
2297 if (index == old_index) {
2298
2299 local_sub(event_length, &cpu_buffer->entries_bytes);
2300 return 1;
2301 }
2302 }
2303
2304
2305 return 0;
2306}
2307
2308static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2309{
2310 local_inc(&cpu_buffer->committing);
2311 local_inc(&cpu_buffer->commits);
2312}
2313
2314static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2315{
2316 unsigned long commits;
2317
2318 if (RB_WARN_ON(cpu_buffer,
2319 !local_read(&cpu_buffer->committing)))
2320 return;
2321
2322 again:
2323 commits = local_read(&cpu_buffer->commits);
2324
2325 barrier();
2326 if (local_read(&cpu_buffer->committing) == 1)
2327 rb_set_commit_to_write(cpu_buffer);
2328
2329 local_dec(&cpu_buffer->committing);
2330
2331
2332 barrier();
2333
2334
2335
2336
2337
2338
2339 if (unlikely(local_read(&cpu_buffer->commits) != commits) &&
2340 !local_read(&cpu_buffer->committing)) {
2341 local_inc(&cpu_buffer->committing);
2342 goto again;
2343 }
2344}
2345
2346static struct ring_buffer_event *
2347rb_reserve_next_event(struct ring_buffer *buffer,
2348 struct ring_buffer_per_cpu *cpu_buffer,
2349 unsigned long length)
2350{
2351 struct ring_buffer_event *event;
2352 u64 ts, delta;
2353 int nr_loops = 0;
2354 int add_timestamp;
2355 u64 diff;
2356
2357 rb_start_commit(cpu_buffer);
2358
2359#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2360
2361
2362
2363
2364
2365
2366 barrier();
2367 if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
2368 local_dec(&cpu_buffer->committing);
2369 local_dec(&cpu_buffer->commits);
2370 return NULL;
2371 }
2372#endif
2373
2374 length = rb_calculate_event_length(length);
2375 again:
2376 add_timestamp = 0;
2377 delta = 0;
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
2389 goto out_fail;
2390
2391 ts = rb_time_stamp(cpu_buffer->buffer);
2392 diff = ts - cpu_buffer->write_stamp;
2393
2394
2395 barrier();
2396
2397
2398 if (likely(ts >= cpu_buffer->write_stamp)) {
2399 delta = diff;
2400 if (unlikely(test_time_stamp(delta))) {
2401 int local_clock_stable = 1;
2402#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2403 local_clock_stable = sched_clock_stable;
2404#endif
2405 WARN_ONCE(delta > (1ULL << 59),
2406 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2407 (unsigned long long)delta,
2408 (unsigned long long)ts,
2409 (unsigned long long)cpu_buffer->write_stamp,
2410 local_clock_stable ? "" :
2411 "If you just came from a suspend/resume,\n"
2412 "please switch to the trace global clock:\n"
2413 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2414 add_timestamp = 1;
2415 }
2416 }
2417
2418 event = __rb_reserve_next(cpu_buffer, length, ts,
2419 delta, add_timestamp);
2420 if (unlikely(PTR_ERR(event) == -EAGAIN))
2421 goto again;
2422
2423 if (!event)
2424 goto out_fail;
2425
2426 return event;
2427
2428 out_fail:
2429 rb_end_commit(cpu_buffer);
2430 return NULL;
2431}
2432
2433#ifdef CONFIG_TRACING
2434
2435#define TRACE_RECURSIVE_DEPTH 16
2436
2437
2438static noinline void trace_recursive_fail(void)
2439{
2440
2441 tracing_off_permanent();
2442
2443 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2444 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2445 trace_recursion_buffer(),
2446 hardirq_count() >> HARDIRQ_SHIFT,
2447 softirq_count() >> SOFTIRQ_SHIFT,
2448 in_nmi());
2449
2450 WARN_ON_ONCE(1);
2451}
2452
2453static inline int trace_recursive_lock(void)
2454{
2455 trace_recursion_inc();
2456
2457 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
2458 return 0;
2459
2460 trace_recursive_fail();
2461
2462 return -1;
2463}
2464
2465static inline void trace_recursive_unlock(void)
2466{
2467 WARN_ON_ONCE(!trace_recursion_buffer());
2468
2469 trace_recursion_dec();
2470}
2471
2472#else
2473
2474#define trace_recursive_lock() (0)
2475#define trace_recursive_unlock() do { } while (0)
2476
2477#endif
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494struct ring_buffer_event *
2495ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2496{
2497 struct ring_buffer_per_cpu *cpu_buffer;
2498 struct ring_buffer_event *event;
2499 int cpu;
2500
2501 if (ring_buffer_flags != RB_BUFFERS_ON)
2502 return NULL;
2503
2504
2505 preempt_disable_notrace();
2506
2507 if (atomic_read(&buffer->record_disabled))
2508 goto out_nocheck;
2509
2510 if (trace_recursive_lock())
2511 goto out_nocheck;
2512
2513 cpu = raw_smp_processor_id();
2514
2515 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2516 goto out;
2517
2518 cpu_buffer = buffer->buffers[cpu];
2519
2520 if (atomic_read(&cpu_buffer->record_disabled))
2521 goto out;
2522
2523 if (length > BUF_MAX_DATA_SIZE)
2524 goto out;
2525
2526 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2527 if (!event)
2528 goto out;
2529
2530 return event;
2531
2532 out:
2533 trace_recursive_unlock();
2534
2535 out_nocheck:
2536 preempt_enable_notrace();
2537 return NULL;
2538}
2539EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
2540
2541static void
2542rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2543 struct ring_buffer_event *event)
2544{
2545 u64 delta;
2546
2547
2548
2549
2550
2551 if (rb_event_is_commit(cpu_buffer, event)) {
2552
2553
2554
2555
2556 if (!rb_event_index(event))
2557 cpu_buffer->write_stamp =
2558 cpu_buffer->commit_page->page->time_stamp;
2559 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2560 delta = event->array[0];
2561 delta <<= TS_SHIFT;
2562 delta += event->time_delta;
2563 cpu_buffer->write_stamp += delta;
2564 } else
2565 cpu_buffer->write_stamp += event->time_delta;
2566 }
2567}
2568
2569static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
2570 struct ring_buffer_event *event)
2571{
2572 local_inc(&cpu_buffer->entries);
2573 rb_update_write_stamp(cpu_buffer, event);
2574 rb_end_commit(cpu_buffer);
2575}
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2587 struct ring_buffer_event *event)
2588{
2589 struct ring_buffer_per_cpu *cpu_buffer;
2590 int cpu = raw_smp_processor_id();
2591
2592 cpu_buffer = buffer->buffers[cpu];
2593
2594 rb_commit(cpu_buffer, event);
2595
2596 trace_recursive_unlock();
2597
2598 preempt_enable_notrace();
2599
2600 return 0;
2601}
2602EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2603
2604static inline void rb_event_discard(struct ring_buffer_event *event)
2605{
2606 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2607 event = skip_time_extend(event);
2608
2609
2610 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2611 event->type_len = RINGBUF_TYPE_PADDING;
2612
2613 if (!event->time_delta)
2614 event->time_delta = 1;
2615}
2616
2617
2618
2619
2620
2621
2622
2623static inline void
2624rb_decrement_entry(struct ring_buffer_per_cpu *cpu_buffer,
2625 struct ring_buffer_event *event)
2626{
2627 unsigned long addr = (unsigned long)event;
2628 struct buffer_page *bpage = cpu_buffer->commit_page;
2629 struct buffer_page *start;
2630
2631 addr &= PAGE_MASK;
2632
2633
2634 if (likely(bpage->page == (void *)addr)) {
2635 local_dec(&bpage->entries);
2636 return;
2637 }
2638
2639
2640
2641
2642
2643 rb_inc_page(cpu_buffer, &bpage);
2644 start = bpage;
2645 do {
2646 if (bpage->page == (void *)addr) {
2647 local_dec(&bpage->entries);
2648 return;
2649 }
2650 rb_inc_page(cpu_buffer, &bpage);
2651 } while (bpage != start);
2652
2653
2654 RB_WARN_ON(cpu_buffer, 1);
2655}
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676void ring_buffer_discard_commit(struct ring_buffer *buffer,
2677 struct ring_buffer_event *event)
2678{
2679 struct ring_buffer_per_cpu *cpu_buffer;
2680 int cpu;
2681
2682
2683 rb_event_discard(event);
2684
2685 cpu = smp_processor_id();
2686 cpu_buffer = buffer->buffers[cpu];
2687
2688
2689
2690
2691
2692
2693 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
2694
2695 rb_decrement_entry(cpu_buffer, event);
2696 if (rb_try_to_discard(cpu_buffer, event))
2697 goto out;
2698
2699
2700
2701
2702
2703 rb_update_write_stamp(cpu_buffer, event);
2704 out:
2705 rb_end_commit(cpu_buffer);
2706
2707 trace_recursive_unlock();
2708
2709 preempt_enable_notrace();
2710
2711}
2712EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727int ring_buffer_write(struct ring_buffer *buffer,
2728 unsigned long length,
2729 void *data)
2730{
2731 struct ring_buffer_per_cpu *cpu_buffer;
2732 struct ring_buffer_event *event;
2733 void *body;
2734 int ret = -EBUSY;
2735 int cpu;
2736
2737 if (ring_buffer_flags != RB_BUFFERS_ON)
2738 return -EBUSY;
2739
2740 preempt_disable_notrace();
2741
2742 if (atomic_read(&buffer->record_disabled))
2743 goto out;
2744
2745 cpu = raw_smp_processor_id();
2746
2747 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2748 goto out;
2749
2750 cpu_buffer = buffer->buffers[cpu];
2751
2752 if (atomic_read(&cpu_buffer->record_disabled))
2753 goto out;
2754
2755 if (length > BUF_MAX_DATA_SIZE)
2756 goto out;
2757
2758 event = rb_reserve_next_event(buffer, cpu_buffer, length);
2759 if (!event)
2760 goto out;
2761
2762 body = rb_event_data(event);
2763
2764 memcpy(body, data, length);
2765
2766 rb_commit(cpu_buffer, event);
2767
2768 ret = 0;
2769 out:
2770 preempt_enable_notrace();
2771
2772 return ret;
2773}
2774EXPORT_SYMBOL_GPL(ring_buffer_write);
2775
2776static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
2777{
2778 struct buffer_page *reader = cpu_buffer->reader_page;
2779 struct buffer_page *head = rb_set_head_page(cpu_buffer);
2780 struct buffer_page *commit = cpu_buffer->commit_page;
2781
2782
2783 if (unlikely(!head))
2784 return 1;
2785
2786 return reader->read == rb_page_commit(reader) &&
2787 (commit == reader ||
2788 (commit == head &&
2789 head->read == rb_page_commit(commit)));
2790}
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801void ring_buffer_record_disable(struct ring_buffer *buffer)
2802{
2803 atomic_inc(&buffer->record_disabled);
2804}
2805EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2806
2807
2808
2809
2810
2811
2812
2813
2814void ring_buffer_record_enable(struct ring_buffer *buffer)
2815{
2816 atomic_dec(&buffer->record_disabled);
2817}
2818EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831void ring_buffer_record_off(struct ring_buffer *buffer)
2832{
2833 unsigned int rd;
2834 unsigned int new_rd;
2835
2836 do {
2837 rd = atomic_read(&buffer->record_disabled);
2838 new_rd = rd | RB_BUFFER_OFF;
2839 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2840}
2841EXPORT_SYMBOL_GPL(ring_buffer_record_off);
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854void ring_buffer_record_on(struct ring_buffer *buffer)
2855{
2856 unsigned int rd;
2857 unsigned int new_rd;
2858
2859 do {
2860 rd = atomic_read(&buffer->record_disabled);
2861 new_rd = rd & ~RB_BUFFER_OFF;
2862 } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
2863}
2864EXPORT_SYMBOL_GPL(ring_buffer_record_on);
2865
2866
2867
2868
2869
2870
2871
2872int ring_buffer_record_is_on(struct ring_buffer *buffer)
2873{
2874 return !atomic_read(&buffer->record_disabled);
2875}
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
2888{
2889 struct ring_buffer_per_cpu *cpu_buffer;
2890
2891 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2892 return;
2893
2894 cpu_buffer = buffer->buffers[cpu];
2895 atomic_inc(&cpu_buffer->record_disabled);
2896}
2897EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2908{
2909 struct ring_buffer_per_cpu *cpu_buffer;
2910
2911 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2912 return;
2913
2914 cpu_buffer = buffer->buffers[cpu];
2915 atomic_dec(&cpu_buffer->record_disabled);
2916}
2917EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2918
2919
2920
2921
2922
2923
2924
2925static inline unsigned long
2926rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2927{
2928 return local_read(&cpu_buffer->entries) -
2929 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2930}
2931
2932
2933
2934
2935
2936
2937u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
2938{
2939 unsigned long flags;
2940 struct ring_buffer_per_cpu *cpu_buffer;
2941 struct buffer_page *bpage;
2942 u64 ret = 0;
2943
2944 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2945 return 0;
2946
2947 cpu_buffer = buffer->buffers[cpu];
2948 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2949
2950
2951
2952
2953 if (cpu_buffer->tail_page == cpu_buffer->reader_page)
2954 bpage = cpu_buffer->reader_page;
2955 else
2956 bpage = rb_set_head_page(cpu_buffer);
2957 if (bpage)
2958 ret = bpage->page->time_stamp;
2959 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2960
2961 return ret;
2962}
2963EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
2964
2965
2966
2967
2968
2969
2970unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
2971{
2972 struct ring_buffer_per_cpu *cpu_buffer;
2973 unsigned long ret;
2974
2975 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2976 return 0;
2977
2978 cpu_buffer = buffer->buffers[cpu];
2979 ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
2980
2981 return ret;
2982}
2983EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
2984
2985
2986
2987
2988
2989
2990unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2991{
2992 struct ring_buffer_per_cpu *cpu_buffer;
2993
2994 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2995 return 0;
2996
2997 cpu_buffer = buffer->buffers[cpu];
2998
2999 return rb_num_of_entries(cpu_buffer);
3000}
3001EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
3002
3003
3004
3005
3006
3007
3008
3009unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
3010{
3011 struct ring_buffer_per_cpu *cpu_buffer;
3012 unsigned long ret;
3013
3014 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3015 return 0;
3016
3017 cpu_buffer = buffer->buffers[cpu];
3018 ret = local_read(&cpu_buffer->overrun);
3019
3020 return ret;
3021}
3022EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
3023
3024
3025
3026
3027
3028
3029
3030
3031unsigned long
3032ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
3033{
3034 struct ring_buffer_per_cpu *cpu_buffer;
3035 unsigned long ret;
3036
3037 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3038 return 0;
3039
3040 cpu_buffer = buffer->buffers[cpu];
3041 ret = local_read(&cpu_buffer->commit_overrun);
3042
3043 return ret;
3044}
3045EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
3046
3047
3048
3049
3050
3051
3052
3053unsigned long
3054ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3055{
3056 struct ring_buffer_per_cpu *cpu_buffer;
3057 unsigned long ret;
3058
3059 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3060 return 0;
3061
3062 cpu_buffer = buffer->buffers[cpu];
3063 ret = local_read(&cpu_buffer->dropped_events);
3064
3065 return ret;
3066}
3067EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3068
3069
3070
3071
3072
3073
3074
3075
3076unsigned long ring_buffer_entries(struct ring_buffer *buffer)
3077{
3078 struct ring_buffer_per_cpu *cpu_buffer;
3079 unsigned long entries = 0;
3080 int cpu;
3081
3082
3083 for_each_buffer_cpu(buffer, cpu) {
3084 cpu_buffer = buffer->buffers[cpu];
3085 entries += rb_num_of_entries(cpu_buffer);
3086 }
3087
3088 return entries;
3089}
3090EXPORT_SYMBOL_GPL(ring_buffer_entries);
3091
3092
3093
3094
3095
3096
3097
3098
3099unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
3100{
3101 struct ring_buffer_per_cpu *cpu_buffer;
3102 unsigned long overruns = 0;
3103 int cpu;
3104
3105
3106 for_each_buffer_cpu(buffer, cpu) {
3107 cpu_buffer = buffer->buffers[cpu];
3108 overruns += local_read(&cpu_buffer->overrun);
3109 }
3110
3111 return overruns;
3112}
3113EXPORT_SYMBOL_GPL(ring_buffer_overruns);
3114
3115static void rb_iter_reset(struct ring_buffer_iter *iter)
3116{
3117 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3118
3119
3120 if (list_empty(&cpu_buffer->reader_page->list)) {
3121 iter->head_page = rb_set_head_page(cpu_buffer);
3122 if (unlikely(!iter->head_page))
3123 return;
3124 iter->head = iter->head_page->read;
3125 } else {
3126 iter->head_page = cpu_buffer->reader_page;
3127 iter->head = cpu_buffer->reader_page->read;
3128 }
3129 if (iter->head)
3130 iter->read_stamp = cpu_buffer->read_stamp;
3131 else
3132 iter->read_stamp = iter->head_page->page->time_stamp;
3133 iter->cache_reader_page = cpu_buffer->reader_page;
3134 iter->cache_read = cpu_buffer->read;
3135}
3136
3137
3138
3139
3140
3141
3142
3143
3144void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
3145{
3146 struct ring_buffer_per_cpu *cpu_buffer;
3147 unsigned long flags;
3148
3149 if (!iter)
3150 return;
3151
3152 cpu_buffer = iter->cpu_buffer;
3153
3154 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3155 rb_iter_reset(iter);
3156 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3157}
3158EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3159
3160
3161
3162
3163
3164int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3165{
3166 struct ring_buffer_per_cpu *cpu_buffer;
3167
3168 cpu_buffer = iter->cpu_buffer;
3169
3170 return iter->head_page == cpu_buffer->commit_page &&
3171 iter->head == rb_commit_index(cpu_buffer);
3172}
3173EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3174
3175static void
3176rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
3177 struct ring_buffer_event *event)
3178{
3179 u64 delta;
3180
3181 switch (event->type_len) {
3182 case RINGBUF_TYPE_PADDING:
3183 return;
3184
3185 case RINGBUF_TYPE_TIME_EXTEND:
3186 delta = event->array[0];
3187 delta <<= TS_SHIFT;
3188 delta += event->time_delta;
3189 cpu_buffer->read_stamp += delta;
3190 return;
3191
3192 case RINGBUF_TYPE_TIME_STAMP:
3193
3194 return;
3195
3196 case RINGBUF_TYPE_DATA:
3197 cpu_buffer->read_stamp += event->time_delta;
3198 return;
3199
3200 default:
3201 BUG();
3202 }
3203 return;
3204}
3205
3206static void
3207rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
3208 struct ring_buffer_event *event)
3209{
3210 u64 delta;
3211
3212 switch (event->type_len) {
3213 case RINGBUF_TYPE_PADDING:
3214 return;
3215
3216 case RINGBUF_TYPE_TIME_EXTEND:
3217 delta = event->array[0];
3218 delta <<= TS_SHIFT;
3219 delta += event->time_delta;
3220 iter->read_stamp += delta;
3221 return;
3222
3223 case RINGBUF_TYPE_TIME_STAMP:
3224
3225 return;
3226
3227 case RINGBUF_TYPE_DATA:
3228 iter->read_stamp += event->time_delta;
3229 return;
3230
3231 default:
3232 BUG();
3233 }
3234 return;
3235}
3236
3237static struct buffer_page *
3238rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3239{
3240 struct buffer_page *reader = NULL;
3241 unsigned long overwrite;
3242 unsigned long flags;
3243 int nr_loops = 0;
3244 int ret;
3245
3246 local_irq_save(flags);
3247 arch_spin_lock(&cpu_buffer->lock);
3248
3249 again:
3250
3251
3252
3253
3254
3255
3256 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
3257 reader = NULL;
3258 goto out;
3259 }
3260
3261 reader = cpu_buffer->reader_page;
3262
3263
3264 if (cpu_buffer->reader_page->read < rb_page_size(reader))
3265 goto out;
3266
3267
3268 if (RB_WARN_ON(cpu_buffer,
3269 cpu_buffer->reader_page->read > rb_page_size(reader)))
3270 goto out;
3271
3272
3273 reader = NULL;
3274 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3275 goto out;
3276
3277
3278 if (rb_num_of_entries(cpu_buffer) == 0)
3279 goto out;
3280
3281
3282
3283
3284 local_set(&cpu_buffer->reader_page->write, 0);
3285 local_set(&cpu_buffer->reader_page->entries, 0);
3286 local_set(&cpu_buffer->reader_page->page->commit, 0);
3287 cpu_buffer->reader_page->real_end = 0;
3288
3289 spin:
3290
3291
3292
3293 reader = rb_set_head_page(cpu_buffer);
3294 if (!reader)
3295 goto out;
3296 cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next);
3297 cpu_buffer->reader_page->list.prev = reader->list.prev;
3298
3299
3300
3301
3302
3303
3304 cpu_buffer->pages = reader->list.prev;
3305
3306
3307 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318 smp_mb();
3319 overwrite = local_read(&(cpu_buffer->overrun));
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332 ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
3333
3334
3335
3336
3337 if (!ret)
3338 goto spin;
3339
3340
3341
3342
3343
3344
3345 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
3346 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
3347
3348
3349 cpu_buffer->reader_page = reader;
3350 rb_reset_reader_page(cpu_buffer);
3351
3352 if (overwrite != cpu_buffer->last_overrun) {
3353 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
3354 cpu_buffer->last_overrun = overwrite;
3355 }
3356
3357 goto again;
3358
3359 out:
3360 arch_spin_unlock(&cpu_buffer->lock);
3361 local_irq_restore(flags);
3362
3363 return reader;
3364}
3365
3366static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3367{
3368 struct ring_buffer_event *event;
3369 struct buffer_page *reader;
3370 unsigned length;
3371
3372 reader = rb_get_reader_page(cpu_buffer);
3373
3374
3375 if (RB_WARN_ON(cpu_buffer, !reader))
3376 return;
3377
3378 event = rb_reader_event(cpu_buffer);
3379
3380 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
3381 cpu_buffer->read++;
3382
3383 rb_update_read_stamp(cpu_buffer, event);
3384
3385 length = rb_event_length(event);
3386 cpu_buffer->reader_page->read += length;
3387}
3388
3389static void rb_advance_iter(struct ring_buffer_iter *iter)
3390{
3391 struct ring_buffer_per_cpu *cpu_buffer;
3392 struct ring_buffer_event *event;
3393 unsigned length;
3394
3395 cpu_buffer = iter->cpu_buffer;
3396
3397
3398
3399
3400 if (iter->head >= rb_page_size(iter->head_page)) {
3401
3402 if (iter->head_page == cpu_buffer->commit_page)
3403 return;
3404 rb_inc_iter(iter);
3405 return;
3406 }
3407
3408 event = rb_iter_head_event(iter);
3409
3410 length = rb_event_length(event);
3411
3412
3413
3414
3415
3416 if (RB_WARN_ON(cpu_buffer,
3417 (iter->head_page == cpu_buffer->commit_page) &&
3418 (iter->head + length > rb_commit_index(cpu_buffer))))
3419 return;
3420
3421 rb_update_iter_read_stamp(iter, event);
3422
3423 iter->head += length;
3424
3425
3426 if ((iter->head >= rb_page_size(iter->head_page)) &&
3427 (iter->head_page != cpu_buffer->commit_page))
3428 rb_advance_iter(iter);
3429}
3430
3431static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3432{
3433 return cpu_buffer->lost_events;
3434}
3435
3436static struct ring_buffer_event *
3437rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3438 unsigned long *lost_events)
3439{
3440 struct ring_buffer_event *event;
3441 struct buffer_page *reader;
3442 int nr_loops = 0;
3443
3444 again:
3445
3446
3447
3448
3449
3450
3451 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3452 return NULL;
3453
3454 reader = rb_get_reader_page(cpu_buffer);
3455 if (!reader)
3456 return NULL;
3457
3458 event = rb_reader_event(cpu_buffer);
3459
3460 switch (event->type_len) {
3461 case RINGBUF_TYPE_PADDING:
3462 if (rb_null_event(event))
3463 RB_WARN_ON(cpu_buffer, 1);
3464
3465
3466
3467
3468
3469
3470
3471
3472 return event;
3473
3474 case RINGBUF_TYPE_TIME_EXTEND:
3475
3476 rb_advance_reader(cpu_buffer);
3477 goto again;
3478
3479 case RINGBUF_TYPE_TIME_STAMP:
3480
3481 rb_advance_reader(cpu_buffer);
3482 goto again;
3483
3484 case RINGBUF_TYPE_DATA:
3485 if (ts) {
3486 *ts = cpu_buffer->read_stamp + event->time_delta;
3487 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3488 cpu_buffer->cpu, ts);
3489 }
3490 if (lost_events)
3491 *lost_events = rb_lost_events(cpu_buffer);
3492 return event;
3493
3494 default:
3495 BUG();
3496 }
3497
3498 return NULL;
3499}
3500EXPORT_SYMBOL_GPL(ring_buffer_peek);
3501
3502static struct ring_buffer_event *
3503rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3504{
3505 struct ring_buffer *buffer;
3506 struct ring_buffer_per_cpu *cpu_buffer;
3507 struct ring_buffer_event *event;
3508 int nr_loops = 0;
3509
3510 cpu_buffer = iter->cpu_buffer;
3511 buffer = cpu_buffer->buffer;
3512
3513
3514
3515
3516
3517
3518 if (unlikely(iter->cache_read != cpu_buffer->read ||
3519 iter->cache_reader_page != cpu_buffer->reader_page))
3520 rb_iter_reset(iter);
3521
3522 again:
3523 if (ring_buffer_iter_empty(iter))
3524 return NULL;
3525
3526
3527
3528
3529
3530
3531
3532 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3533 return NULL;
3534
3535 if (rb_per_cpu_empty(cpu_buffer))
3536 return NULL;
3537
3538 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3539 rb_inc_iter(iter);
3540 goto again;
3541 }
3542
3543 event = rb_iter_head_event(iter);
3544
3545 switch (event->type_len) {
3546 case RINGBUF_TYPE_PADDING:
3547 if (rb_null_event(event)) {
3548 rb_inc_iter(iter);
3549 goto again;
3550 }
3551 rb_advance_iter(iter);
3552 return event;
3553
3554 case RINGBUF_TYPE_TIME_EXTEND:
3555
3556 rb_advance_iter(iter);
3557 goto again;
3558
3559 case RINGBUF_TYPE_TIME_STAMP:
3560
3561 rb_advance_iter(iter);
3562 goto again;
3563
3564 case RINGBUF_TYPE_DATA:
3565 if (ts) {
3566 *ts = iter->read_stamp + event->time_delta;
3567 ring_buffer_normalize_time_stamp(buffer,
3568 cpu_buffer->cpu, ts);
3569 }
3570 return event;
3571
3572 default:
3573 BUG();
3574 }
3575
3576 return NULL;
3577}
3578EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
3579
3580static inline int rb_ok_to_lock(void)
3581{
3582
3583
3584
3585
3586
3587
3588 if (likely(!in_nmi()))
3589 return 1;
3590
3591 tracing_off_permanent();
3592 return 0;
3593}
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605struct ring_buffer_event *
3606ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3607 unsigned long *lost_events)
3608{
3609 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3610 struct ring_buffer_event *event;
3611 unsigned long flags;
3612 int dolock;
3613
3614 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3615 return NULL;
3616
3617 dolock = rb_ok_to_lock();
3618 again:
3619 local_irq_save(flags);
3620 if (dolock)
3621 raw_spin_lock(&cpu_buffer->reader_lock);
3622 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3623 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3624 rb_advance_reader(cpu_buffer);
3625 if (dolock)
3626 raw_spin_unlock(&cpu_buffer->reader_lock);
3627 local_irq_restore(flags);
3628
3629 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3630 goto again;
3631
3632 return event;
3633}
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643struct ring_buffer_event *
3644ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3645{
3646 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3647 struct ring_buffer_event *event;
3648 unsigned long flags;
3649
3650 again:
3651 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3652 event = rb_iter_peek(iter, ts);
3653 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3654
3655 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3656 goto again;
3657
3658 return event;
3659}
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672struct ring_buffer_event *
3673ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3674 unsigned long *lost_events)
3675{
3676 struct ring_buffer_per_cpu *cpu_buffer;
3677 struct ring_buffer_event *event = NULL;
3678 unsigned long flags;
3679 int dolock;
3680
3681 dolock = rb_ok_to_lock();
3682
3683 again:
3684
3685 preempt_disable();
3686
3687 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3688 goto out;
3689
3690 cpu_buffer = buffer->buffers[cpu];
3691 local_irq_save(flags);
3692 if (dolock)
3693 raw_spin_lock(&cpu_buffer->reader_lock);
3694
3695 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3696 if (event) {
3697 cpu_buffer->lost_events = 0;
3698 rb_advance_reader(cpu_buffer);
3699 }
3700
3701 if (dolock)
3702 raw_spin_unlock(&cpu_buffer->reader_lock);
3703 local_irq_restore(flags);
3704
3705 out:
3706 preempt_enable();
3707
3708 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3709 goto again;
3710
3711 return event;
3712}
3713EXPORT_SYMBOL_GPL(ring_buffer_consume);
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735struct ring_buffer_iter *
3736ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3737{
3738 struct ring_buffer_per_cpu *cpu_buffer;
3739 struct ring_buffer_iter *iter;
3740
3741 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3742 return NULL;
3743
3744 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
3745 if (!iter)
3746 return NULL;
3747
3748 cpu_buffer = buffer->buffers[cpu];
3749
3750 iter->cpu_buffer = cpu_buffer;
3751
3752 atomic_inc(&buffer->resize_disabled);
3753 atomic_inc(&cpu_buffer->record_disabled);
3754
3755 return iter;
3756}
3757EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3758
3759
3760
3761
3762
3763
3764
3765
3766void
3767ring_buffer_read_prepare_sync(void)
3768{
3769 synchronize_sched();
3770}
3771EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784void
3785ring_buffer_read_start(struct ring_buffer_iter *iter)
3786{
3787 struct ring_buffer_per_cpu *cpu_buffer;
3788 unsigned long flags;
3789
3790 if (!iter)
3791 return;
3792
3793 cpu_buffer = iter->cpu_buffer;
3794
3795 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3796 arch_spin_lock(&cpu_buffer->lock);
3797 rb_iter_reset(iter);
3798 arch_spin_unlock(&cpu_buffer->lock);
3799 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3800}
3801EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3802
3803
3804
3805
3806
3807
3808
3809
3810void
3811ring_buffer_read_finish(struct ring_buffer_iter *iter)
3812{
3813 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3814 unsigned long flags;
3815
3816
3817
3818
3819
3820
3821
3822 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3823 rb_check_pages(cpu_buffer);
3824 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3825
3826 atomic_dec(&cpu_buffer->record_disabled);
3827 atomic_dec(&cpu_buffer->buffer->resize_disabled);
3828 kfree(iter);
3829}
3830EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
3831
3832
3833
3834
3835
3836
3837
3838
3839struct ring_buffer_event *
3840ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
3841{
3842 struct ring_buffer_event *event;
3843 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
3844 unsigned long flags;
3845
3846 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3847 again:
3848 event = rb_iter_peek(iter, ts);
3849 if (!event)
3850 goto out;
3851
3852 if (event->type_len == RINGBUF_TYPE_PADDING)
3853 goto again;
3854
3855 rb_advance_iter(iter);
3856 out:
3857 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3858
3859 return event;
3860}
3861EXPORT_SYMBOL_GPL(ring_buffer_read);
3862
3863
3864
3865
3866
3867unsigned long ring_buffer_size(struct ring_buffer *buffer, int cpu)
3868{
3869
3870
3871
3872
3873
3874
3875 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3876 return 0;
3877
3878 return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
3879}
3880EXPORT_SYMBOL_GPL(ring_buffer_size);
3881
3882static void
3883rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3884{
3885 rb_head_page_deactivate(cpu_buffer);
3886
3887 cpu_buffer->head_page
3888 = list_entry(cpu_buffer->pages, struct buffer_page, list);
3889 local_set(&cpu_buffer->head_page->write, 0);
3890 local_set(&cpu_buffer->head_page->entries, 0);
3891 local_set(&cpu_buffer->head_page->page->commit, 0);
3892
3893 cpu_buffer->head_page->read = 0;
3894
3895 cpu_buffer->tail_page = cpu_buffer->head_page;
3896 cpu_buffer->commit_page = cpu_buffer->head_page;
3897
3898 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
3899 INIT_LIST_HEAD(&cpu_buffer->new_pages);
3900 local_set(&cpu_buffer->reader_page->write, 0);
3901 local_set(&cpu_buffer->reader_page->entries, 0);
3902 local_set(&cpu_buffer->reader_page->page->commit, 0);
3903 cpu_buffer->reader_page->read = 0;
3904
3905 local_set(&cpu_buffer->entries_bytes, 0);
3906 local_set(&cpu_buffer->overrun, 0);
3907 local_set(&cpu_buffer->commit_overrun, 0);
3908 local_set(&cpu_buffer->dropped_events, 0);
3909 local_set(&cpu_buffer->entries, 0);
3910 local_set(&cpu_buffer->committing, 0);
3911 local_set(&cpu_buffer->commits, 0);
3912 cpu_buffer->read = 0;
3913 cpu_buffer->read_bytes = 0;
3914
3915 cpu_buffer->write_stamp = 0;
3916 cpu_buffer->read_stamp = 0;
3917
3918 cpu_buffer->lost_events = 0;
3919 cpu_buffer->last_overrun = 0;
3920
3921 rb_head_page_activate(cpu_buffer);
3922}
3923
3924
3925
3926
3927
3928
3929void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
3930{
3931 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3932 unsigned long flags;
3933
3934 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3935 return;
3936
3937 atomic_inc(&buffer->resize_disabled);
3938 atomic_inc(&cpu_buffer->record_disabled);
3939
3940
3941 synchronize_sched();
3942
3943 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3944
3945 if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
3946 goto out;
3947
3948 arch_spin_lock(&cpu_buffer->lock);
3949
3950 rb_reset_cpu(cpu_buffer);
3951
3952 arch_spin_unlock(&cpu_buffer->lock);
3953
3954 out:
3955 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3956
3957 atomic_dec(&cpu_buffer->record_disabled);
3958 atomic_dec(&buffer->resize_disabled);
3959}
3960EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
3961
3962
3963
3964
3965
3966void ring_buffer_reset(struct ring_buffer *buffer)
3967{
3968 int cpu;
3969
3970 for_each_buffer_cpu(buffer, cpu)
3971 ring_buffer_reset_cpu(buffer, cpu);
3972}
3973EXPORT_SYMBOL_GPL(ring_buffer_reset);
3974
3975
3976
3977
3978
3979int ring_buffer_empty(struct ring_buffer *buffer)
3980{
3981 struct ring_buffer_per_cpu *cpu_buffer;
3982 unsigned long flags;
3983 int dolock;
3984 int cpu;
3985 int ret;
3986
3987 dolock = rb_ok_to_lock();
3988
3989
3990 for_each_buffer_cpu(buffer, cpu) {
3991 cpu_buffer = buffer->buffers[cpu];
3992 local_irq_save(flags);
3993 if (dolock)
3994 raw_spin_lock(&cpu_buffer->reader_lock);
3995 ret = rb_per_cpu_empty(cpu_buffer);
3996 if (dolock)
3997 raw_spin_unlock(&cpu_buffer->reader_lock);
3998 local_irq_restore(flags);
3999
4000 if (!ret)
4001 return 0;
4002 }
4003
4004 return 1;
4005}
4006EXPORT_SYMBOL_GPL(ring_buffer_empty);
4007
4008
4009
4010
4011
4012
4013int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
4014{
4015 struct ring_buffer_per_cpu *cpu_buffer;
4016 unsigned long flags;
4017 int dolock;
4018 int ret;
4019
4020 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4021 return 1;
4022
4023 dolock = rb_ok_to_lock();
4024
4025 cpu_buffer = buffer->buffers[cpu];
4026 local_irq_save(flags);
4027 if (dolock)
4028 raw_spin_lock(&cpu_buffer->reader_lock);
4029 ret = rb_per_cpu_empty(cpu_buffer);
4030 if (dolock)
4031 raw_spin_unlock(&cpu_buffer->reader_lock);
4032 local_irq_restore(flags);
4033
4034 return ret;
4035}
4036EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
4037
4038#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
4050 struct ring_buffer *buffer_b, int cpu)
4051{
4052 struct ring_buffer_per_cpu *cpu_buffer_a;
4053 struct ring_buffer_per_cpu *cpu_buffer_b;
4054 int ret = -EINVAL;
4055
4056 if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
4057 !cpumask_test_cpu(cpu, buffer_b->cpumask))
4058 goto out;
4059
4060 cpu_buffer_a = buffer_a->buffers[cpu];
4061 cpu_buffer_b = buffer_b->buffers[cpu];
4062
4063
4064 if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
4065 goto out;
4066
4067 ret = -EAGAIN;
4068
4069 if (ring_buffer_flags != RB_BUFFERS_ON)
4070 goto out;
4071
4072 if (atomic_read(&buffer_a->record_disabled))
4073 goto out;
4074
4075 if (atomic_read(&buffer_b->record_disabled))
4076 goto out;
4077
4078 if (atomic_read(&cpu_buffer_a->record_disabled))
4079 goto out;
4080
4081 if (atomic_read(&cpu_buffer_b->record_disabled))
4082 goto out;
4083
4084
4085
4086
4087
4088
4089
4090 atomic_inc(&cpu_buffer_a->record_disabled);
4091 atomic_inc(&cpu_buffer_b->record_disabled);
4092
4093 ret = -EBUSY;
4094 if (local_read(&cpu_buffer_a->committing))
4095 goto out_dec;
4096 if (local_read(&cpu_buffer_b->committing))
4097 goto out_dec;
4098
4099 buffer_a->buffers[cpu] = cpu_buffer_b;
4100 buffer_b->buffers[cpu] = cpu_buffer_a;
4101
4102 cpu_buffer_b->buffer = buffer_a;
4103 cpu_buffer_a->buffer = buffer_b;
4104
4105 ret = 0;
4106
4107out_dec:
4108 atomic_dec(&cpu_buffer_a->record_disabled);
4109 atomic_dec(&cpu_buffer_b->record_disabled);
4110out:
4111 return ret;
4112}
4113EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
4114#endif
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
4132{
4133 struct buffer_data_page *bpage;
4134 struct page *page;
4135
4136 page = alloc_pages_node(cpu_to_node(cpu),
4137 GFP_KERNEL | __GFP_NORETRY, 0);
4138 if (!page)
4139 return NULL;
4140
4141 bpage = page_address(page);
4142
4143 rb_init_page(bpage);
4144
4145 return bpage;
4146}
4147EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
4148
4149
4150
4151
4152
4153
4154
4155
4156void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
4157{
4158 free_page((unsigned long)data);
4159}
4160EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195int ring_buffer_read_page(struct ring_buffer *buffer,
4196 void **data_page, size_t len, int cpu, int full)
4197{
4198 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4199 struct ring_buffer_event *event;
4200 struct buffer_data_page *bpage;
4201 struct buffer_page *reader;
4202 unsigned long missed_events;
4203 unsigned long flags;
4204 unsigned int commit;
4205 unsigned int read;
4206 u64 save_timestamp;
4207 int ret = -1;
4208
4209 if (!cpumask_test_cpu(cpu, buffer->cpumask))
4210 goto out;
4211
4212
4213
4214
4215
4216 if (len <= BUF_PAGE_HDR_SIZE)
4217 goto out;
4218
4219 len -= BUF_PAGE_HDR_SIZE;
4220
4221 if (!data_page)
4222 goto out;
4223
4224 bpage = *data_page;
4225 if (!bpage)
4226 goto out;
4227
4228 raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
4229
4230 reader = rb_get_reader_page(cpu_buffer);
4231 if (!reader)
4232 goto out_unlock;
4233
4234 event = rb_reader_event(cpu_buffer);
4235
4236 read = reader->read;
4237 commit = rb_page_commit(reader);
4238
4239
4240 missed_events = cpu_buffer->lost_events;
4241
4242
4243
4244
4245
4246
4247
4248
4249 if (read || (len < (commit - read)) ||
4250 cpu_buffer->reader_page == cpu_buffer->commit_page) {
4251 struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
4252 unsigned int rpos = read;
4253 unsigned int pos = 0;
4254 unsigned int size;
4255
4256 if (full)
4257 goto out_unlock;
4258
4259 if (len > (commit - read))
4260 len = (commit - read);
4261
4262
4263 size = rb_event_ts_length(event);
4264
4265 if (len < size)
4266 goto out_unlock;
4267
4268
4269 save_timestamp = cpu_buffer->read_stamp;
4270
4271
4272 do {
4273
4274
4275
4276
4277
4278
4279 size = rb_event_length(event);
4280 memcpy(bpage->data + pos, rpage->data + rpos, size);
4281
4282 len -= size;
4283
4284 rb_advance_reader(cpu_buffer);
4285 rpos = reader->read;
4286 pos += size;
4287
4288 if (rpos >= commit)
4289 break;
4290
4291 event = rb_reader_event(cpu_buffer);
4292
4293 size = rb_event_ts_length(event);
4294 } while (len >= size);
4295
4296
4297 local_set(&bpage->commit, pos);
4298 bpage->time_stamp = save_timestamp;
4299
4300
4301 read = 0;
4302 } else {
4303
4304 cpu_buffer->read += rb_page_entries(reader);
4305 cpu_buffer->read_bytes += BUF_PAGE_SIZE;
4306
4307
4308 rb_init_page(bpage);
4309 bpage = reader->page;
4310 reader->page = *data_page;
4311 local_set(&reader->write, 0);
4312 local_set(&reader->entries, 0);
4313 reader->read = 0;
4314 *data_page = bpage;
4315
4316
4317
4318
4319
4320
4321 if (reader->real_end)
4322 local_set(&bpage->commit, reader->real_end);
4323 }
4324 ret = read;
4325
4326 cpu_buffer->lost_events = 0;
4327
4328 commit = local_read(&bpage->commit);
4329
4330
4331
4332 if (missed_events) {
4333
4334
4335
4336 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
4337 memcpy(&bpage->data[commit], &missed_events,
4338 sizeof(missed_events));
4339 local_add(RB_MISSED_STORED, &bpage->commit);
4340 commit += sizeof(missed_events);
4341 }
4342 local_add(RB_MISSED_EVENTS, &bpage->commit);
4343 }
4344
4345
4346
4347
4348 if (commit < BUF_PAGE_SIZE)
4349 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
4350
4351 out_unlock:
4352 raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
4353
4354 out:
4355 return ret;
4356}
4357EXPORT_SYMBOL_GPL(ring_buffer_read_page);
4358
4359#ifdef CONFIG_HOTPLUG_CPU
4360static int rb_cpu_notify(struct notifier_block *self,
4361 unsigned long action, void *hcpu)
4362{
4363 struct ring_buffer *buffer =
4364 container_of(self, struct ring_buffer, cpu_notify);
4365 long cpu = (long)hcpu;
4366 int cpu_i, nr_pages_same;
4367 unsigned int nr_pages;
4368
4369 switch (action) {
4370 case CPU_UP_PREPARE:
4371 case CPU_UP_PREPARE_FROZEN:
4372 if (cpumask_test_cpu(cpu, buffer->cpumask))
4373 return NOTIFY_OK;
4374
4375 nr_pages = 0;
4376 nr_pages_same = 1;
4377
4378 for_each_buffer_cpu(buffer, cpu_i) {
4379
4380 if (nr_pages == 0)
4381 nr_pages = buffer->buffers[cpu_i]->nr_pages;
4382 if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
4383 nr_pages_same = 0;
4384 break;
4385 }
4386 }
4387
4388 if (!nr_pages_same)
4389 nr_pages = 2;
4390 buffer->buffers[cpu] =
4391 rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
4392 if (!buffer->buffers[cpu]) {
4393 WARN(1, "failed to allocate ring buffer on CPU %ld\n",
4394 cpu);
4395 return NOTIFY_OK;
4396 }
4397 smp_wmb();
4398 cpumask_set_cpu(cpu, buffer->cpumask);
4399 break;
4400 case CPU_DOWN_PREPARE:
4401 case CPU_DOWN_PREPARE_FROZEN:
4402
4403
4404
4405
4406
4407 break;
4408 default:
4409 break;
4410 }
4411 return NOTIFY_OK;
4412}
4413#endif
4414