Merge tag 'kgdb-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/danielt...
[linux.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193
194 static int __init set_cmdline_ftrace(char *str)
195 {
196         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197         default_bootup_tracer = bootup_tracer_buf;
198         /* We are using ftrace early, expand it */
199         trace_set_ring_buffer_expanded(NULL);
200         return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206         if (*str++ != '=' || !*str || !strcmp("1", str)) {
207                 ftrace_dump_on_oops = DUMP_ALL;
208                 return 1;
209         }
210
211         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212                 ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219
220 static int __init stop_trace_on_warning(char *str)
221 {
222         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223                 __disable_trace_on_warning = 1;
224         return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227
228 static int __init boot_alloc_snapshot(char *str)
229 {
230         char *slot = boot_snapshot_info + boot_snapshot_index;
231         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232         int ret;
233
234         if (str[0] == '=') {
235                 str++;
236                 if (strlen(str) >= left)
237                         return -1;
238
239                 ret = snprintf(slot, left, "%s\t", str);
240                 boot_snapshot_index += ret;
241         } else {
242                 allocate_snapshot = true;
243                 /* We also need the main ring buffer expanded */
244                 trace_set_ring_buffer_expanded(NULL);
245         }
246         return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249
250
251 static int __init boot_snapshot(char *str)
252 {
253         snapshot_at_boot = true;
254         boot_alloc_snapshot(str);
255         return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258
259
260 static int __init boot_instance(char *str)
261 {
262         char *slot = boot_instance_info + boot_instance_index;
263         int left = sizeof(boot_instance_info) - boot_instance_index;
264         int ret;
265
266         if (strlen(str) >= left)
267                 return -1;
268
269         ret = snprintf(slot, left, "%s\t", str);
270         boot_instance_index += ret;
271
272         return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275
276
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278
279 static int __init set_trace_boot_options(char *str)
280 {
281         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282         return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288
289 static int __init set_trace_boot_clock(char *str)
290 {
291         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292         trace_boot_clock = trace_boot_clock_buf;
293         return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296
297 static int __init set_tracepoint_printk(char *str)
298 {
299         /* Ignore the "tp_printk_stop_on_boot" param */
300         if (*str == '_')
301                 return 0;
302
303         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304                 tracepoint_printk = 1;
305         return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311         tracepoint_printk_stop_on_boot = true;
312         return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315
316 unsigned long long ns2usecs(u64 nsec)
317 {
318         nsec += 500;
319         do_div(nsec, 1000);
320         return nsec;
321 }
322
323 static void
324 trace_process_export(struct trace_export *export,
325                struct ring_buffer_event *event, int flag)
326 {
327         struct trace_entry *entry;
328         unsigned int size = 0;
329
330         if (export->flags & flag) {
331                 entry = ring_buffer_event_data(event);
332                 size = ring_buffer_event_length(event);
333                 export->write(export, entry, size);
334         }
335 }
336
337 static DEFINE_MUTEX(ftrace_export_lock);
338
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347         if (export->flags & TRACE_EXPORT_FUNCTION)
348                 static_branch_inc(&trace_function_exports_enabled);
349
350         if (export->flags & TRACE_EXPORT_EVENT)
351                 static_branch_inc(&trace_event_exports_enabled);
352
353         if (export->flags & TRACE_EXPORT_MARKER)
354                 static_branch_inc(&trace_marker_exports_enabled);
355 }
356
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359         if (export->flags & TRACE_EXPORT_FUNCTION)
360                 static_branch_dec(&trace_function_exports_enabled);
361
362         if (export->flags & TRACE_EXPORT_EVENT)
363                 static_branch_dec(&trace_event_exports_enabled);
364
365         if (export->flags & TRACE_EXPORT_MARKER)
366                 static_branch_dec(&trace_marker_exports_enabled);
367 }
368
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371         struct trace_export *export;
372
373         preempt_disable_notrace();
374
375         export = rcu_dereference_raw_check(ftrace_exports_list);
376         while (export) {
377                 trace_process_export(export, event, flag);
378                 export = rcu_dereference_raw_check(export->next);
379         }
380
381         preempt_enable_notrace();
382 }
383
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387         rcu_assign_pointer(export->next, *list);
388         /*
389          * We are entering export into the list but another
390          * CPU might be walking that list. We need to make sure
391          * the export->next pointer is valid before another CPU sees
392          * the export pointer included into the list.
393          */
394         rcu_assign_pointer(*list, export);
395 }
396
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400         struct trace_export **p;
401
402         for (p = list; *p != NULL; p = &(*p)->next)
403                 if (*p == export)
404                         break;
405
406         if (*p != export)
407                 return -1;
408
409         rcu_assign_pointer(*p, (*p)->next);
410
411         return 0;
412 }
413
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417         ftrace_exports_enable(export);
418
419         add_trace_export(list, export);
420 }
421
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425         int ret;
426
427         ret = rm_trace_export(list, export);
428         ftrace_exports_disable(export);
429
430         return ret;
431 }
432
433 int register_ftrace_export(struct trace_export *export)
434 {
435         if (WARN_ON_ONCE(!export->write))
436                 return -1;
437
438         mutex_lock(&ftrace_export_lock);
439
440         add_ftrace_export(&ftrace_exports_list, export);
441
442         mutex_unlock(&ftrace_export_lock);
443
444         return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450         int ret;
451
452         mutex_lock(&ftrace_export_lock);
453
454         ret = rm_ftrace_export(&ftrace_exports_list, export);
455
456         mutex_unlock(&ftrace_export_lock);
457
458         return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS                                             \
464         (FUNCTION_DEFAULT_FLAGS |                                       \
465          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
466          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
467          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
468          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
469          TRACE_ITER_HASH_PTR)
470
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
473                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484         .trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489         if (!tr)
490                 tr = &global_trace;
491         tr->ring_buffer_expanded = true;
492 }
493
494 LIST_HEAD(ftrace_trace_arrays);
495
496 int trace_array_get(struct trace_array *this_tr)
497 {
498         struct trace_array *tr;
499         int ret = -ENODEV;
500
501         mutex_lock(&trace_types_lock);
502         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503                 if (tr == this_tr) {
504                         tr->ref++;
505                         ret = 0;
506                         break;
507                 }
508         }
509         mutex_unlock(&trace_types_lock);
510
511         return ret;
512 }
513
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516         WARN_ON(!this_tr->ref);
517         this_tr->ref--;
518 }
519
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531         if (!this_tr)
532                 return;
533
534         mutex_lock(&trace_types_lock);
535         __trace_array_put(this_tr);
536         mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542         int ret;
543
544         ret = security_locked_down(LOCKDOWN_TRACEFS);
545         if (ret)
546                 return ret;
547
548         if (tracing_disabled)
549                 return -ENODEV;
550
551         if (tr && trace_array_get(tr) < 0)
552                 return -ENODEV;
553
554         return 0;
555 }
556
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558                               struct trace_buffer *buffer,
559                               struct ring_buffer_event *event)
560 {
561         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562             !filter_match_preds(call->filter, rec)) {
563                 __trace_event_discard_commit(buffer, event);
564                 return 1;
565         }
566
567         return 0;
568 }
569
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580         return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595                        struct trace_pid_list *filtered_no_pids,
596                        struct task_struct *task)
597 {
598         /*
599          * If filtered_no_pids is not empty, and the task's pid is listed
600          * in filtered_no_pids, then return true.
601          * Otherwise, if filtered_pids is empty, that means we can
602          * trace all tasks. If it has content, then only trace pids
603          * within filtered_pids.
604          */
605
606         return (filtered_pids &&
607                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608                 (filtered_no_pids &&
609                  trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625                                   struct task_struct *self,
626                                   struct task_struct *task)
627 {
628         if (!pid_list)
629                 return;
630
631         /* For forks, we only add if the forking task is listed */
632         if (self) {
633                 if (!trace_find_filtered_pid(pid_list, self->pid))
634                         return;
635         }
636
637         /* "self" is set for forks, and NULL for exits */
638         if (self)
639                 trace_pid_list_set(pid_list, task->pid);
640         else
641                 trace_pid_list_clear(pid_list, task->pid);
642 }
643
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658         long pid = (unsigned long)v;
659         unsigned int next;
660
661         (*pos)++;
662
663         /* pid already is +1 of the actual previous bit */
664         if (trace_pid_list_next(pid_list, pid, &next) < 0)
665                 return NULL;
666
667         pid = next;
668
669         /* Return pid + 1 to allow zero to be represented */
670         return (void *)(pid + 1);
671 }
672
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686         unsigned long pid;
687         unsigned int first;
688         loff_t l = 0;
689
690         if (trace_pid_list_first(pid_list, &first) < 0)
691                 return NULL;
692
693         pid = first;
694
695         /* Return pid + 1 so that zero can be the exit value */
696         for (pid++; pid && l < *pos;
697              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698                 ;
699         return (void *)pid;
700 }
701
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712         unsigned long pid = (unsigned long)v - 1;
713
714         seq_printf(m, "%lu\n", pid);
715         return 0;
716 }
717
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE            127
720
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722                     struct trace_pid_list **new_pid_list,
723                     const char __user *ubuf, size_t cnt)
724 {
725         struct trace_pid_list *pid_list;
726         struct trace_parser parser;
727         unsigned long val;
728         int nr_pids = 0;
729         ssize_t read = 0;
730         ssize_t ret;
731         loff_t pos;
732         pid_t pid;
733
734         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735                 return -ENOMEM;
736
737         /*
738          * Always recreate a new array. The write is an all or nothing
739          * operation. Always create a new array when adding new pids by
740          * the user. If the operation fails, then the current list is
741          * not modified.
742          */
743         pid_list = trace_pid_list_alloc();
744         if (!pid_list) {
745                 trace_parser_put(&parser);
746                 return -ENOMEM;
747         }
748
749         if (filtered_pids) {
750                 /* copy the current bits to the new max */
751                 ret = trace_pid_list_first(filtered_pids, &pid);
752                 while (!ret) {
753                         trace_pid_list_set(pid_list, pid);
754                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755                         nr_pids++;
756                 }
757         }
758
759         ret = 0;
760         while (cnt > 0) {
761
762                 pos = 0;
763
764                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
765                 if (ret < 0)
766                         break;
767
768                 read += ret;
769                 ubuf += ret;
770                 cnt -= ret;
771
772                 if (!trace_parser_loaded(&parser))
773                         break;
774
775                 ret = -EINVAL;
776                 if (kstrtoul(parser.buffer, 0, &val))
777                         break;
778
779                 pid = (pid_t)val;
780
781                 if (trace_pid_list_set(pid_list, pid) < 0) {
782                         ret = -1;
783                         break;
784                 }
785                 nr_pids++;
786
787                 trace_parser_clear(&parser);
788                 ret = 0;
789         }
790         trace_parser_put(&parser);
791
792         if (ret < 0) {
793                 trace_pid_list_free(pid_list);
794                 return ret;
795         }
796
797         if (!nr_pids) {
798                 /* Cleared the list of pids */
799                 trace_pid_list_free(pid_list);
800                 pid_list = NULL;
801         }
802
803         *new_pid_list = pid_list;
804
805         return read;
806 }
807
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810         u64 ts;
811
812         /* Early boot up does not have a buffer yet */
813         if (!buf->buffer)
814                 return trace_clock_local();
815
816         ts = ring_buffer_time_stamp(buf->buffer);
817         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818
819         return ts;
820 }
821
822 u64 ftrace_now(int cpu)
823 {
824         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838         /*
839          * For quick access (irqsoff uses this in fast path), just
840          * return the mirror variable of the state of the ring buffer.
841          * It's a little racy, but we don't really care.
842          */
843         smp_rmb();
844         return !global_trace.buffer_disabled;
845 }
846
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer            *trace_types __read_mostly;
863
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
895 static inline void trace_access_lock(int cpu)
896 {
897         if (cpu == RING_BUFFER_ALL_CPUS) {
898                 /* gain it for accessing the whole ring buffer. */
899                 down_write(&all_cpu_access_lock);
900         } else {
901                 /* gain it for accessing a cpu ring buffer. */
902
903                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904                 down_read(&all_cpu_access_lock);
905
906                 /* Secondly block other access to this @cpu ring buffer. */
907                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908         }
909 }
910
911 static inline void trace_access_unlock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 up_write(&all_cpu_access_lock);
915         } else {
916                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917                 up_read(&all_cpu_access_lock);
918         }
919 }
920
921 static inline void trace_access_lock_init(void)
922 {
923         int cpu;
924
925         for_each_possible_cpu(cpu)
926                 mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928
929 #else
930
931 static DEFINE_MUTEX(access_lock);
932
933 static inline void trace_access_lock(int cpu)
934 {
935         (void)cpu;
936         mutex_lock(&access_lock);
937 }
938
939 static inline void trace_access_unlock(int cpu)
940 {
941         (void)cpu;
942         mutex_unlock(&access_lock);
943 }
944
945 static inline void trace_access_lock_init(void)
946 {
947 }
948
949 #endif
950
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953                                  unsigned int trace_ctx,
954                                  int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956                                       struct trace_buffer *buffer,
957                                       unsigned int trace_ctx,
958                                       int skip, struct pt_regs *regs);
959
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962                                         unsigned int trace_ctx,
963                                         int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967                                       struct trace_buffer *buffer,
968                                       unsigned long trace_ctx,
969                                       int skip, struct pt_regs *regs)
970 {
971 }
972
973 #endif
974
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977                   int type, unsigned int trace_ctx)
978 {
979         struct trace_entry *ent = ring_buffer_event_data(event);
980
981         tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986                           int type,
987                           unsigned long len,
988                           unsigned int trace_ctx)
989 {
990         struct ring_buffer_event *event;
991
992         event = ring_buffer_lock_reserve(buffer, len);
993         if (event != NULL)
994                 trace_event_setup(event, type, trace_ctx);
995
996         return event;
997 }
998
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001         if (tr->array_buffer.buffer)
1002                 ring_buffer_record_on(tr->array_buffer.buffer);
1003         /*
1004          * This flag is looked at when buffers haven't been allocated
1005          * yet, or by some tracers (like irqsoff), that just want to
1006          * know if the ring buffer has been disabled, but it can handle
1007          * races of where it gets disabled but we still do a record.
1008          * As the check is in the fast path of the tracers, it is more
1009          * important to be fast than accurate.
1010          */
1011         tr->buffer_disabled = 0;
1012         /* Make the flag seen by readers */
1013         smp_wmb();
1014 }
1015
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024         tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032         __this_cpu_write(trace_taskinfo_save, true);
1033
1034         /* If this is the temp buffer, we need to commit fully */
1035         if (this_cpu_read(trace_buffered_event) == event) {
1036                 /* Length is in event->array[0] */
1037                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038                 /* Release the temp buffer */
1039                 this_cpu_dec(trace_buffered_event_cnt);
1040                 /* ring_buffer_unlock_commit() enables preemption */
1041                 preempt_enable_notrace();
1042         } else
1043                 ring_buffer_unlock_commit(buffer);
1044 }
1045
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047                        const char *str, int size)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct print_entry *entry;
1052         unsigned int trace_ctx;
1053         int alloc;
1054
1055         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056                 return 0;
1057
1058         if (unlikely(tracing_selftest_running && tr == &global_trace))
1059                 return 0;
1060
1061         if (unlikely(tracing_disabled))
1062                 return 0;
1063
1064         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066         trace_ctx = tracing_gen_ctx();
1067         buffer = tr->array_buffer.buffer;
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070                                             trace_ctx);
1071         if (!event) {
1072                 size = 0;
1073                 goto out;
1074         }
1075
1076         entry = ring_buffer_event_data(event);
1077         entry->ip = ip;
1078
1079         memcpy(&entry->buf, str, size);
1080
1081         /* Add a newline if necessary */
1082         if (entry->buf[size - 1] != '\n') {
1083                 entry->buf[size] = '\n';
1084                 entry->buf[size + 1] = '\0';
1085         } else
1086                 entry->buf[size] = '\0';
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091         ring_buffer_nest_end(buffer);
1092         return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:    The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104         return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:    The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115         struct ring_buffer_event *event;
1116         struct trace_buffer *buffer;
1117         struct bputs_entry *entry;
1118         unsigned int trace_ctx;
1119         int size = sizeof(struct bputs_entry);
1120         int ret = 0;
1121
1122         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123                 return 0;
1124
1125         if (unlikely(tracing_selftest_running || tracing_disabled))
1126                 return 0;
1127
1128         trace_ctx = tracing_gen_ctx();
1129         buffer = global_trace.array_buffer.buffer;
1130
1131         ring_buffer_nest_start(buffer);
1132         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133                                             trace_ctx);
1134         if (!event)
1135                 goto out;
1136
1137         entry = ring_buffer_event_data(event);
1138         entry->ip                       = ip;
1139         entry->str                      = str;
1140
1141         __buffer_unlock_commit(buffer, event);
1142         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143
1144         ret = 1;
1145  out:
1146         ring_buffer_nest_end(buffer);
1147         return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153                                            void *cond_data)
1154 {
1155         struct tracer *tracer = tr->current_trace;
1156         unsigned long flags;
1157
1158         if (in_nmi()) {
1159                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161                 return;
1162         }
1163
1164         if (!tr->allocated_snapshot) {
1165                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167                 tracer_tracing_off(tr);
1168                 return;
1169         }
1170
1171         /* Note, snapshot can not be used when the tracer uses it */
1172         if (tracer->use_max_tr) {
1173                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175                 return;
1176         }
1177
1178         local_irq_save(flags);
1179         update_max_tr(tr, current, smp_processor_id(), cond_data);
1180         local_irq_restore(flags);
1181 }
1182
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185         tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204         struct trace_array *tr = &global_trace;
1205
1206         tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:         The tracing instance to snapshot
1213  * @cond_data:  The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225         tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:         The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245         void *cond_data = NULL;
1246
1247         local_irq_disable();
1248         arch_spin_lock(&tr->max_lock);
1249
1250         if (tr->cond_snapshot)
1251                 cond_data = tr->cond_snapshot->cond_data;
1252
1253         arch_spin_unlock(&tr->max_lock);
1254         local_irq_enable();
1255
1256         return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261                                         struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266         int ret;
1267
1268         if (!tr->allocated_snapshot) {
1269
1270                 /* allocate spare buffer */
1271                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1272                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273                 if (ret < 0)
1274                         return ret;
1275
1276                 tr->allocated_snapshot = true;
1277         }
1278
1279         return 0;
1280 }
1281
1282 static void free_snapshot(struct trace_array *tr)
1283 {
1284         /*
1285          * We don't free the ring buffer. instead, resize it because
1286          * The max_tr ring buffer has some state (e.g. ring->clock) and
1287          * we want preserve it.
1288          */
1289         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1290         set_buffer_entries(&tr->max_buffer, 1);
1291         tracing_reset_online_cpus(&tr->max_buffer);
1292         tr->allocated_snapshot = false;
1293 }
1294
1295 /**
1296  * tracing_alloc_snapshot - allocate snapshot buffer.
1297  *
1298  * This only allocates the snapshot buffer if it isn't already
1299  * allocated - it doesn't also take a snapshot.
1300  *
1301  * This is meant to be used in cases where the snapshot buffer needs
1302  * to be set up for events that can't sleep but need to be able to
1303  * trigger a snapshot.
1304  */
1305 int tracing_alloc_snapshot(void)
1306 {
1307         struct trace_array *tr = &global_trace;
1308         int ret;
1309
1310         ret = tracing_alloc_snapshot_instance(tr);
1311         WARN_ON(ret < 0);
1312
1313         return ret;
1314 }
1315 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316
1317 /**
1318  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319  *
1320  * This is similar to tracing_snapshot(), but it will allocate the
1321  * snapshot buffer if it isn't already allocated. Use this only
1322  * where it is safe to sleep, as the allocation may sleep.
1323  *
1324  * This causes a swap between the snapshot buffer and the current live
1325  * tracing buffer. You can use this to take snapshots of the live
1326  * trace when some condition is triggered, but continue to trace.
1327  */
1328 void tracing_snapshot_alloc(void)
1329 {
1330         int ret;
1331
1332         ret = tracing_alloc_snapshot();
1333         if (ret < 0)
1334                 return;
1335
1336         tracing_snapshot();
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339
1340 /**
1341  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342  * @tr:         The tracing instance
1343  * @cond_data:  User data to associate with the snapshot
1344  * @update:     Implementation of the cond_snapshot update function
1345  *
1346  * Check whether the conditional snapshot for the given instance has
1347  * already been enabled, or if the current tracer is already using a
1348  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349  * save the cond_data and update function inside.
1350  *
1351  * Returns 0 if successful, error otherwise.
1352  */
1353 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354                                  cond_update_fn_t update)
1355 {
1356         struct cond_snapshot *cond_snapshot;
1357         int ret = 0;
1358
1359         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1360         if (!cond_snapshot)
1361                 return -ENOMEM;
1362
1363         cond_snapshot->cond_data = cond_data;
1364         cond_snapshot->update = update;
1365
1366         mutex_lock(&trace_types_lock);
1367
1368         ret = tracing_alloc_snapshot_instance(tr);
1369         if (ret)
1370                 goto fail_unlock;
1371
1372         if (tr->current_trace->use_max_tr) {
1373                 ret = -EBUSY;
1374                 goto fail_unlock;
1375         }
1376
1377         /*
1378          * The cond_snapshot can only change to NULL without the
1379          * trace_types_lock. We don't care if we race with it going
1380          * to NULL, but we want to make sure that it's not set to
1381          * something other than NULL when we get here, which we can
1382          * do safely with only holding the trace_types_lock and not
1383          * having to take the max_lock.
1384          */
1385         if (tr->cond_snapshot) {
1386                 ret = -EBUSY;
1387                 goto fail_unlock;
1388         }
1389
1390         local_irq_disable();
1391         arch_spin_lock(&tr->max_lock);
1392         tr->cond_snapshot = cond_snapshot;
1393         arch_spin_unlock(&tr->max_lock);
1394         local_irq_enable();
1395
1396         mutex_unlock(&trace_types_lock);
1397
1398         return ret;
1399
1400  fail_unlock:
1401         mutex_unlock(&trace_types_lock);
1402         kfree(cond_snapshot);
1403         return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406
1407 /**
1408  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409  * @tr:         The tracing instance
1410  *
1411  * Check whether the conditional snapshot for the given instance is
1412  * enabled; if so, free the cond_snapshot associated with it,
1413  * otherwise return -EINVAL.
1414  *
1415  * Returns 0 if successful, error otherwise.
1416  */
1417 int tracing_snapshot_cond_disable(struct trace_array *tr)
1418 {
1419         int ret = 0;
1420
1421         local_irq_disable();
1422         arch_spin_lock(&tr->max_lock);
1423
1424         if (!tr->cond_snapshot)
1425                 ret = -EINVAL;
1426         else {
1427                 kfree(tr->cond_snapshot);
1428                 tr->cond_snapshot = NULL;
1429         }
1430
1431         arch_spin_unlock(&tr->max_lock);
1432         local_irq_enable();
1433
1434         return ret;
1435 }
1436 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437 #else
1438 void tracing_snapshot(void)
1439 {
1440         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441 }
1442 EXPORT_SYMBOL_GPL(tracing_snapshot);
1443 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444 {
1445         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1448 int tracing_alloc_snapshot(void)
1449 {
1450         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451         return -ENODEV;
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1454 void tracing_snapshot_alloc(void)
1455 {
1456         /* Give warning */
1457         tracing_snapshot();
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1460 void *tracing_cond_snapshot_data(struct trace_array *tr)
1461 {
1462         return NULL;
1463 }
1464 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1465 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466 {
1467         return -ENODEV;
1468 }
1469 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1470 int tracing_snapshot_cond_disable(struct trace_array *tr)
1471 {
1472         return false;
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475 #define free_snapshot(tr)       do { } while (0)
1476 #endif /* CONFIG_TRACER_SNAPSHOT */
1477
1478 void tracer_tracing_off(struct trace_array *tr)
1479 {
1480         if (tr->array_buffer.buffer)
1481                 ring_buffer_record_off(tr->array_buffer.buffer);
1482         /*
1483          * This flag is looked at when buffers haven't been allocated
1484          * yet, or by some tracers (like irqsoff), that just want to
1485          * know if the ring buffer has been disabled, but it can handle
1486          * races of where it gets disabled but we still do a record.
1487          * As the check is in the fast path of the tracers, it is more
1488          * important to be fast than accurate.
1489          */
1490         tr->buffer_disabled = 1;
1491         /* Make the flag seen by readers */
1492         smp_wmb();
1493 }
1494
1495 /**
1496  * tracing_off - turn off tracing buffers
1497  *
1498  * This function stops the tracing buffers from recording data.
1499  * It does not disable any overhead the tracers themselves may
1500  * be causing. This function simply causes all recording to
1501  * the ring buffers to fail.
1502  */
1503 void tracing_off(void)
1504 {
1505         tracer_tracing_off(&global_trace);
1506 }
1507 EXPORT_SYMBOL_GPL(tracing_off);
1508
1509 void disable_trace_on_warning(void)
1510 {
1511         if (__disable_trace_on_warning) {
1512                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1513                         "Disabling tracing due to warning\n");
1514                 tracing_off();
1515         }
1516 }
1517
1518 /**
1519  * tracer_tracing_is_on - show real state of ring buffer enabled
1520  * @tr : the trace array to know if ring buffer is enabled
1521  *
1522  * Shows real state of the ring buffer if it is enabled or not.
1523  */
1524 bool tracer_tracing_is_on(struct trace_array *tr)
1525 {
1526         if (tr->array_buffer.buffer)
1527                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1528         return !tr->buffer_disabled;
1529 }
1530
1531 /**
1532  * tracing_is_on - show state of ring buffers enabled
1533  */
1534 int tracing_is_on(void)
1535 {
1536         return tracer_tracing_is_on(&global_trace);
1537 }
1538 EXPORT_SYMBOL_GPL(tracing_is_on);
1539
1540 static int __init set_buf_size(char *str)
1541 {
1542         unsigned long buf_size;
1543
1544         if (!str)
1545                 return 0;
1546         buf_size = memparse(str, &str);
1547         /*
1548          * nr_entries can not be zero and the startup
1549          * tests require some buffer space. Therefore
1550          * ensure we have at least 4096 bytes of buffer.
1551          */
1552         trace_buf_size = max(4096UL, buf_size);
1553         return 1;
1554 }
1555 __setup("trace_buf_size=", set_buf_size);
1556
1557 static int __init set_tracing_thresh(char *str)
1558 {
1559         unsigned long threshold;
1560         int ret;
1561
1562         if (!str)
1563                 return 0;
1564         ret = kstrtoul(str, 0, &threshold);
1565         if (ret < 0)
1566                 return 0;
1567         tracing_thresh = threshold * 1000;
1568         return 1;
1569 }
1570 __setup("tracing_thresh=", set_tracing_thresh);
1571
1572 unsigned long nsecs_to_usecs(unsigned long nsecs)
1573 {
1574         return nsecs / 1000;
1575 }
1576
1577 /*
1578  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581  * of strings in the order that the evals (enum) were defined.
1582  */
1583 #undef C
1584 #define C(a, b) b
1585
1586 /* These must match the bit positions in trace_iterator_flags */
1587 static const char *trace_options[] = {
1588         TRACE_FLAGS
1589         NULL
1590 };
1591
1592 static struct {
1593         u64 (*func)(void);
1594         const char *name;
1595         int in_ns;              /* is this clock in nanoseconds? */
1596 } trace_clocks[] = {
1597         { trace_clock_local,            "local",        1 },
1598         { trace_clock_global,           "global",       1 },
1599         { trace_clock_counter,          "counter",      0 },
1600         { trace_clock_jiffies,          "uptime",       0 },
1601         { trace_clock,                  "perf",         1 },
1602         { ktime_get_mono_fast_ns,       "mono",         1 },
1603         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1604         { ktime_get_boot_fast_ns,       "boot",         1 },
1605         { ktime_get_tai_fast_ns,        "tai",          1 },
1606         ARCH_TRACE_CLOCKS
1607 };
1608
1609 bool trace_clock_in_ns(struct trace_array *tr)
1610 {
1611         if (trace_clocks[tr->clock_id].in_ns)
1612                 return true;
1613
1614         return false;
1615 }
1616
1617 /*
1618  * trace_parser_get_init - gets the buffer for trace parser
1619  */
1620 int trace_parser_get_init(struct trace_parser *parser, int size)
1621 {
1622         memset(parser, 0, sizeof(*parser));
1623
1624         parser->buffer = kmalloc(size, GFP_KERNEL);
1625         if (!parser->buffer)
1626                 return 1;
1627
1628         parser->size = size;
1629         return 0;
1630 }
1631
1632 /*
1633  * trace_parser_put - frees the buffer for trace parser
1634  */
1635 void trace_parser_put(struct trace_parser *parser)
1636 {
1637         kfree(parser->buffer);
1638         parser->buffer = NULL;
1639 }
1640
1641 /*
1642  * trace_get_user - reads the user input string separated by  space
1643  * (matched by isspace(ch))
1644  *
1645  * For each string found the 'struct trace_parser' is updated,
1646  * and the function returns.
1647  *
1648  * Returns number of bytes read.
1649  *
1650  * See kernel/trace/trace.h for 'struct trace_parser' details.
1651  */
1652 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653         size_t cnt, loff_t *ppos)
1654 {
1655         char ch;
1656         size_t read = 0;
1657         ssize_t ret;
1658
1659         if (!*ppos)
1660                 trace_parser_clear(parser);
1661
1662         ret = get_user(ch, ubuf++);
1663         if (ret)
1664                 goto out;
1665
1666         read++;
1667         cnt--;
1668
1669         /*
1670          * The parser is not finished with the last write,
1671          * continue reading the user input without skipping spaces.
1672          */
1673         if (!parser->cont) {
1674                 /* skip white space */
1675                 while (cnt && isspace(ch)) {
1676                         ret = get_user(ch, ubuf++);
1677                         if (ret)
1678                                 goto out;
1679                         read++;
1680                         cnt--;
1681                 }
1682
1683                 parser->idx = 0;
1684
1685                 /* only spaces were written */
1686                 if (isspace(ch) || !ch) {
1687                         *ppos += read;
1688                         ret = read;
1689                         goto out;
1690                 }
1691         }
1692
1693         /* read the non-space input */
1694         while (cnt && !isspace(ch) && ch) {
1695                 if (parser->idx < parser->size - 1)
1696                         parser->buffer[parser->idx++] = ch;
1697                 else {
1698                         ret = -EINVAL;
1699                         goto out;
1700                 }
1701                 ret = get_user(ch, ubuf++);
1702                 if (ret)
1703                         goto out;
1704                 read++;
1705                 cnt--;
1706         }
1707
1708         /* We either got finished input or we have to wait for another call. */
1709         if (isspace(ch) || !ch) {
1710                 parser->buffer[parser->idx] = 0;
1711                 parser->cont = false;
1712         } else if (parser->idx < parser->size - 1) {
1713                 parser->cont = true;
1714                 parser->buffer[parser->idx++] = ch;
1715                 /* Make sure the parsed string always terminates with '\0'. */
1716                 parser->buffer[parser->idx] = 0;
1717         } else {
1718                 ret = -EINVAL;
1719                 goto out;
1720         }
1721
1722         *ppos += read;
1723         ret = read;
1724
1725 out:
1726         return ret;
1727 }
1728
1729 /* TODO add a seq_buf_to_buffer() */
1730 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731 {
1732         int len;
1733
1734         if (trace_seq_used(s) <= s->readpos)
1735                 return -EBUSY;
1736
1737         len = trace_seq_used(s) - s->readpos;
1738         if (cnt > len)
1739                 cnt = len;
1740         memcpy(buf, s->buffer + s->readpos, cnt);
1741
1742         s->readpos += cnt;
1743         return cnt;
1744 }
1745
1746 unsigned long __read_mostly     tracing_thresh;
1747
1748 #ifdef CONFIG_TRACER_MAX_TRACE
1749 static const struct file_operations tracing_max_lat_fops;
1750
1751 #ifdef LATENCY_FS_NOTIFY
1752
1753 static struct workqueue_struct *fsnotify_wq;
1754
1755 static void latency_fsnotify_workfn(struct work_struct *work)
1756 {
1757         struct trace_array *tr = container_of(work, struct trace_array,
1758                                               fsnotify_work);
1759         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1760 }
1761
1762 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763 {
1764         struct trace_array *tr = container_of(iwork, struct trace_array,
1765                                               fsnotify_irqwork);
1766         queue_work(fsnotify_wq, &tr->fsnotify_work);
1767 }
1768
1769 static void trace_create_maxlat_file(struct trace_array *tr,
1770                                      struct dentry *d_tracer)
1771 {
1772         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1774         tr->d_max_latency = trace_create_file("tracing_max_latency",
1775                                               TRACE_MODE_WRITE,
1776                                               d_tracer, tr,
1777                                               &tracing_max_lat_fops);
1778 }
1779
1780 __init static int latency_fsnotify_init(void)
1781 {
1782         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1783                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1784         if (!fsnotify_wq) {
1785                 pr_err("Unable to allocate tr_max_lat_wq\n");
1786                 return -ENOMEM;
1787         }
1788         return 0;
1789 }
1790
1791 late_initcall_sync(latency_fsnotify_init);
1792
1793 void latency_fsnotify(struct trace_array *tr)
1794 {
1795         if (!fsnotify_wq)
1796                 return;
1797         /*
1798          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799          * possible that we are called from __schedule() or do_idle(), which
1800          * could cause a deadlock.
1801          */
1802         irq_work_queue(&tr->fsnotify_irqwork);
1803 }
1804
1805 #else /* !LATENCY_FS_NOTIFY */
1806
1807 #define trace_create_maxlat_file(tr, d_tracer)                          \
1808         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1809                           d_tracer, tr, &tracing_max_lat_fops)
1810
1811 #endif
1812
1813 /*
1814  * Copy the new maximum trace into the separate maximum-trace
1815  * structure. (this way the maximum trace is permanently saved,
1816  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817  */
1818 static void
1819 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820 {
1821         struct array_buffer *trace_buf = &tr->array_buffer;
1822         struct array_buffer *max_buf = &tr->max_buffer;
1823         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825
1826         max_buf->cpu = cpu;
1827         max_buf->time_start = data->preempt_timestamp;
1828
1829         max_data->saved_latency = tr->max_latency;
1830         max_data->critical_start = data->critical_start;
1831         max_data->critical_end = data->critical_end;
1832
1833         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1834         max_data->pid = tsk->pid;
1835         /*
1836          * If tsk == current, then use current_uid(), as that does not use
1837          * RCU. The irq tracer can be called out of RCU scope.
1838          */
1839         if (tsk == current)
1840                 max_data->uid = current_uid();
1841         else
1842                 max_data->uid = task_uid(tsk);
1843
1844         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845         max_data->policy = tsk->policy;
1846         max_data->rt_priority = tsk->rt_priority;
1847
1848         /* record this tasks comm */
1849         tracing_record_cmdline(tsk);
1850         latency_fsnotify(tr);
1851 }
1852
1853 /**
1854  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855  * @tr: tracer
1856  * @tsk: the task with the latency
1857  * @cpu: The cpu that initiated the trace.
1858  * @cond_data: User data associated with a conditional snapshot
1859  *
1860  * Flip the buffers between the @tr and the max_tr and record information
1861  * about which task was the cause of this latency.
1862  */
1863 void
1864 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865               void *cond_data)
1866 {
1867         if (tr->stop_count)
1868                 return;
1869
1870         WARN_ON_ONCE(!irqs_disabled());
1871
1872         if (!tr->allocated_snapshot) {
1873                 /* Only the nop tracer should hit this when disabling */
1874                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875                 return;
1876         }
1877
1878         arch_spin_lock(&tr->max_lock);
1879
1880         /* Inherit the recordable setting from array_buffer */
1881         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1882                 ring_buffer_record_on(tr->max_buffer.buffer);
1883         else
1884                 ring_buffer_record_off(tr->max_buffer.buffer);
1885
1886 #ifdef CONFIG_TRACER_SNAPSHOT
1887         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888                 arch_spin_unlock(&tr->max_lock);
1889                 return;
1890         }
1891 #endif
1892         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893
1894         __update_max_tr(tr, tsk, cpu);
1895
1896         arch_spin_unlock(&tr->max_lock);
1897 }
1898
1899 /**
1900  * update_max_tr_single - only copy one trace over, and reset the rest
1901  * @tr: tracer
1902  * @tsk: task with the latency
1903  * @cpu: the cpu of the buffer to copy.
1904  *
1905  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1906  */
1907 void
1908 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1909 {
1910         int ret;
1911
1912         if (tr->stop_count)
1913                 return;
1914
1915         WARN_ON_ONCE(!irqs_disabled());
1916         if (!tr->allocated_snapshot) {
1917                 /* Only the nop tracer should hit this when disabling */
1918                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1919                 return;
1920         }
1921
1922         arch_spin_lock(&tr->max_lock);
1923
1924         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1925
1926         if (ret == -EBUSY) {
1927                 /*
1928                  * We failed to swap the buffer due to a commit taking
1929                  * place on this CPU. We fail to record, but we reset
1930                  * the max trace buffer (no one writes directly to it)
1931                  * and flag that it failed.
1932                  * Another reason is resize is in progress.
1933                  */
1934                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1935                         "Failed to swap buffers due to commit or resize in progress\n");
1936         }
1937
1938         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1939
1940         __update_max_tr(tr, tsk, cpu);
1941         arch_spin_unlock(&tr->max_lock);
1942 }
1943
1944 #endif /* CONFIG_TRACER_MAX_TRACE */
1945
1946 static int wait_on_pipe(struct trace_iterator *iter, int full)
1947 {
1948         /* Iterators are static, they should be filled or empty */
1949         if (trace_buffer_iter(iter, iter->cpu_file))
1950                 return 0;
1951
1952         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1953                                 full);
1954 }
1955
1956 #ifdef CONFIG_FTRACE_STARTUP_TEST
1957 static bool selftests_can_run;
1958
1959 struct trace_selftests {
1960         struct list_head                list;
1961         struct tracer                   *type;
1962 };
1963
1964 static LIST_HEAD(postponed_selftests);
1965
1966 static int save_selftest(struct tracer *type)
1967 {
1968         struct trace_selftests *selftest;
1969
1970         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1971         if (!selftest)
1972                 return -ENOMEM;
1973
1974         selftest->type = type;
1975         list_add(&selftest->list, &postponed_selftests);
1976         return 0;
1977 }
1978
1979 static int run_tracer_selftest(struct tracer *type)
1980 {
1981         struct trace_array *tr = &global_trace;
1982         struct tracer *saved_tracer = tr->current_trace;
1983         int ret;
1984
1985         if (!type->selftest || tracing_selftest_disabled)
1986                 return 0;
1987
1988         /*
1989          * If a tracer registers early in boot up (before scheduling is
1990          * initialized and such), then do not run its selftests yet.
1991          * Instead, run it a little later in the boot process.
1992          */
1993         if (!selftests_can_run)
1994                 return save_selftest(type);
1995
1996         if (!tracing_is_on()) {
1997                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1998                         type->name);
1999                 return 0;
2000         }
2001
2002         /*
2003          * Run a selftest on this tracer.
2004          * Here we reset the trace buffer, and set the current
2005          * tracer to be this tracer. The tracer can then run some
2006          * internal tracing to verify that everything is in order.
2007          * If we fail, we do not register this tracer.
2008          */
2009         tracing_reset_online_cpus(&tr->array_buffer);
2010
2011         tr->current_trace = type;
2012
2013 #ifdef CONFIG_TRACER_MAX_TRACE
2014         if (type->use_max_tr) {
2015                 /* If we expanded the buffers, make sure the max is expanded too */
2016                 if (tr->ring_buffer_expanded)
2017                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2018                                            RING_BUFFER_ALL_CPUS);
2019                 tr->allocated_snapshot = true;
2020         }
2021 #endif
2022
2023         /* the test is responsible for initializing and enabling */
2024         pr_info("Testing tracer %s: ", type->name);
2025         ret = type->selftest(type, tr);
2026         /* the test is responsible for resetting too */
2027         tr->current_trace = saved_tracer;
2028         if (ret) {
2029                 printk(KERN_CONT "FAILED!\n");
2030                 /* Add the warning after printing 'FAILED' */
2031                 WARN_ON(1);
2032                 return -1;
2033         }
2034         /* Only reset on passing, to avoid touching corrupted buffers */
2035         tracing_reset_online_cpus(&tr->array_buffer);
2036
2037 #ifdef CONFIG_TRACER_MAX_TRACE
2038         if (type->use_max_tr) {
2039                 tr->allocated_snapshot = false;
2040
2041                 /* Shrink the max buffer again */
2042                 if (tr->ring_buffer_expanded)
2043                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2044                                            RING_BUFFER_ALL_CPUS);
2045         }
2046 #endif
2047
2048         printk(KERN_CONT "PASSED\n");
2049         return 0;
2050 }
2051
2052 static int do_run_tracer_selftest(struct tracer *type)
2053 {
2054         int ret;
2055
2056         /*
2057          * Tests can take a long time, especially if they are run one after the
2058          * other, as does happen during bootup when all the tracers are
2059          * registered. This could cause the soft lockup watchdog to trigger.
2060          */
2061         cond_resched();
2062
2063         tracing_selftest_running = true;
2064         ret = run_tracer_selftest(type);
2065         tracing_selftest_running = false;
2066
2067         return ret;
2068 }
2069
2070 static __init int init_trace_selftests(void)
2071 {
2072         struct trace_selftests *p, *n;
2073         struct tracer *t, **last;
2074         int ret;
2075
2076         selftests_can_run = true;
2077
2078         mutex_lock(&trace_types_lock);
2079
2080         if (list_empty(&postponed_selftests))
2081                 goto out;
2082
2083         pr_info("Running postponed tracer tests:\n");
2084
2085         tracing_selftest_running = true;
2086         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2087                 /* This loop can take minutes when sanitizers are enabled, so
2088                  * lets make sure we allow RCU processing.
2089                  */
2090                 cond_resched();
2091                 ret = run_tracer_selftest(p->type);
2092                 /* If the test fails, then warn and remove from available_tracers */
2093                 if (ret < 0) {
2094                         WARN(1, "tracer: %s failed selftest, disabling\n",
2095                              p->type->name);
2096                         last = &trace_types;
2097                         for (t = trace_types; t; t = t->next) {
2098                                 if (t == p->type) {
2099                                         *last = t->next;
2100                                         break;
2101                                 }
2102                                 last = &t->next;
2103                         }
2104                 }
2105                 list_del(&p->list);
2106                 kfree(p);
2107         }
2108         tracing_selftest_running = false;
2109
2110  out:
2111         mutex_unlock(&trace_types_lock);
2112
2113         return 0;
2114 }
2115 core_initcall(init_trace_selftests);
2116 #else
2117 static inline int run_tracer_selftest(struct tracer *type)
2118 {
2119         return 0;
2120 }
2121 static inline int do_run_tracer_selftest(struct tracer *type)
2122 {
2123         return 0;
2124 }
2125 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2126
2127 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2128
2129 static void __init apply_trace_boot_options(void);
2130
2131 /**
2132  * register_tracer - register a tracer with the ftrace system.
2133  * @type: the plugin for the tracer
2134  *
2135  * Register a new plugin tracer.
2136  */
2137 int __init register_tracer(struct tracer *type)
2138 {
2139         struct tracer *t;
2140         int ret = 0;
2141
2142         if (!type->name) {
2143                 pr_info("Tracer must have a name\n");
2144                 return -1;
2145         }
2146
2147         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2148                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2149                 return -1;
2150         }
2151
2152         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2153                 pr_warn("Can not register tracer %s due to lockdown\n",
2154                            type->name);
2155                 return -EPERM;
2156         }
2157
2158         mutex_lock(&trace_types_lock);
2159
2160         for (t = trace_types; t; t = t->next) {
2161                 if (strcmp(type->name, t->name) == 0) {
2162                         /* already found */
2163                         pr_info("Tracer %s already registered\n",
2164                                 type->name);
2165                         ret = -1;
2166                         goto out;
2167                 }
2168         }
2169
2170         if (!type->set_flag)
2171                 type->set_flag = &dummy_set_flag;
2172         if (!type->flags) {
2173                 /*allocate a dummy tracer_flags*/
2174                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2175                 if (!type->flags) {
2176                         ret = -ENOMEM;
2177                         goto out;
2178                 }
2179                 type->flags->val = 0;
2180                 type->flags->opts = dummy_tracer_opt;
2181         } else
2182                 if (!type->flags->opts)
2183                         type->flags->opts = dummy_tracer_opt;
2184
2185         /* store the tracer for __set_tracer_option */
2186         type->flags->trace = type;
2187
2188         ret = do_run_tracer_selftest(type);
2189         if (ret < 0)
2190                 goto out;
2191
2192         type->next = trace_types;
2193         trace_types = type;
2194         add_tracer_options(&global_trace, type);
2195
2196  out:
2197         mutex_unlock(&trace_types_lock);
2198
2199         if (ret || !default_bootup_tracer)
2200                 goto out_unlock;
2201
2202         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2203                 goto out_unlock;
2204
2205         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2206         /* Do we want this tracer to start on bootup? */
2207         tracing_set_tracer(&global_trace, type->name);
2208         default_bootup_tracer = NULL;
2209
2210         apply_trace_boot_options();
2211
2212         /* disable other selftests, since this will break it. */
2213         disable_tracing_selftest("running a tracer");
2214
2215  out_unlock:
2216         return ret;
2217 }
2218
2219 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2220 {
2221         struct trace_buffer *buffer = buf->buffer;
2222
2223         if (!buffer)
2224                 return;
2225
2226         ring_buffer_record_disable(buffer);
2227
2228         /* Make sure all commits have finished */
2229         synchronize_rcu();
2230         ring_buffer_reset_cpu(buffer, cpu);
2231
2232         ring_buffer_record_enable(buffer);
2233 }
2234
2235 void tracing_reset_online_cpus(struct array_buffer *buf)
2236 {
2237         struct trace_buffer *buffer = buf->buffer;
2238
2239         if (!buffer)
2240                 return;
2241
2242         ring_buffer_record_disable(buffer);
2243
2244         /* Make sure all commits have finished */
2245         synchronize_rcu();
2246
2247         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2248
2249         ring_buffer_reset_online_cpus(buffer);
2250
2251         ring_buffer_record_enable(buffer);
2252 }
2253
2254 /* Must have trace_types_lock held */
2255 void tracing_reset_all_online_cpus_unlocked(void)
2256 {
2257         struct trace_array *tr;
2258
2259         lockdep_assert_held(&trace_types_lock);
2260
2261         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2262                 if (!tr->clear_trace)
2263                         continue;
2264                 tr->clear_trace = false;
2265                 tracing_reset_online_cpus(&tr->array_buffer);
2266 #ifdef CONFIG_TRACER_MAX_TRACE
2267                 tracing_reset_online_cpus(&tr->max_buffer);
2268 #endif
2269         }
2270 }
2271
2272 void tracing_reset_all_online_cpus(void)
2273 {
2274         mutex_lock(&trace_types_lock);
2275         tracing_reset_all_online_cpus_unlocked();
2276         mutex_unlock(&trace_types_lock);
2277 }
2278
2279 /*
2280  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2281  * is the tgid last observed corresponding to pid=i.
2282  */
2283 static int *tgid_map;
2284
2285 /* The maximum valid index into tgid_map. */
2286 static size_t tgid_map_max;
2287
2288 #define SAVED_CMDLINES_DEFAULT 128
2289 #define NO_CMDLINE_MAP UINT_MAX
2290 /*
2291  * Preemption must be disabled before acquiring trace_cmdline_lock.
2292  * The various trace_arrays' max_lock must be acquired in a context
2293  * where interrupt is disabled.
2294  */
2295 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2296 struct saved_cmdlines_buffer {
2297         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2298         unsigned *map_cmdline_to_pid;
2299         unsigned cmdline_num;
2300         int cmdline_idx;
2301         char *saved_cmdlines;
2302 };
2303 static struct saved_cmdlines_buffer *savedcmd;
2304
2305 static inline char *get_saved_cmdlines(int idx)
2306 {
2307         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2308 }
2309
2310 static inline void set_cmdline(int idx, const char *cmdline)
2311 {
2312         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2313 }
2314
2315 static int allocate_cmdlines_buffer(unsigned int val,
2316                                     struct saved_cmdlines_buffer *s)
2317 {
2318         s->map_cmdline_to_pid = kmalloc_array(val,
2319                                               sizeof(*s->map_cmdline_to_pid),
2320                                               GFP_KERNEL);
2321         if (!s->map_cmdline_to_pid)
2322                 return -ENOMEM;
2323
2324         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2325         if (!s->saved_cmdlines) {
2326                 kfree(s->map_cmdline_to_pid);
2327                 return -ENOMEM;
2328         }
2329
2330         s->cmdline_idx = 0;
2331         s->cmdline_num = val;
2332         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2333                sizeof(s->map_pid_to_cmdline));
2334         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2335                val * sizeof(*s->map_cmdline_to_pid));
2336
2337         return 0;
2338 }
2339
2340 static int trace_create_savedcmd(void)
2341 {
2342         int ret;
2343
2344         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2345         if (!savedcmd)
2346                 return -ENOMEM;
2347
2348         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2349         if (ret < 0) {
2350                 kfree(savedcmd);
2351                 savedcmd = NULL;
2352                 return -ENOMEM;
2353         }
2354
2355         return 0;
2356 }
2357
2358 int is_tracing_stopped(void)
2359 {
2360         return global_trace.stop_count;
2361 }
2362
2363 /**
2364  * tracing_start - quick start of the tracer
2365  *
2366  * If tracing is enabled but was stopped by tracing_stop,
2367  * this will start the tracer back up.
2368  */
2369 void tracing_start(void)
2370 {
2371         struct trace_buffer *buffer;
2372         unsigned long flags;
2373
2374         if (tracing_disabled)
2375                 return;
2376
2377         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2378         if (--global_trace.stop_count) {
2379                 if (global_trace.stop_count < 0) {
2380                         /* Someone screwed up their debugging */
2381                         WARN_ON_ONCE(1);
2382                         global_trace.stop_count = 0;
2383                 }
2384                 goto out;
2385         }
2386
2387         /* Prevent the buffers from switching */
2388         arch_spin_lock(&global_trace.max_lock);
2389
2390         buffer = global_trace.array_buffer.buffer;
2391         if (buffer)
2392                 ring_buffer_record_enable(buffer);
2393
2394 #ifdef CONFIG_TRACER_MAX_TRACE
2395         buffer = global_trace.max_buffer.buffer;
2396         if (buffer)
2397                 ring_buffer_record_enable(buffer);
2398 #endif
2399
2400         arch_spin_unlock(&global_trace.max_lock);
2401
2402  out:
2403         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2404 }
2405
2406 static void tracing_start_tr(struct trace_array *tr)
2407 {
2408         struct trace_buffer *buffer;
2409         unsigned long flags;
2410
2411         if (tracing_disabled)
2412                 return;
2413
2414         /* If global, we need to also start the max tracer */
2415         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2416                 return tracing_start();
2417
2418         raw_spin_lock_irqsave(&tr->start_lock, flags);
2419
2420         if (--tr->stop_count) {
2421                 if (tr->stop_count < 0) {
2422                         /* Someone screwed up their debugging */
2423                         WARN_ON_ONCE(1);
2424                         tr->stop_count = 0;
2425                 }
2426                 goto out;
2427         }
2428
2429         buffer = tr->array_buffer.buffer;
2430         if (buffer)
2431                 ring_buffer_record_enable(buffer);
2432
2433  out:
2434         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2435 }
2436
2437 /**
2438  * tracing_stop - quick stop of the tracer
2439  *
2440  * Light weight way to stop tracing. Use in conjunction with
2441  * tracing_start.
2442  */
2443 void tracing_stop(void)
2444 {
2445         struct trace_buffer *buffer;
2446         unsigned long flags;
2447
2448         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2449         if (global_trace.stop_count++)
2450                 goto out;
2451
2452         /* Prevent the buffers from switching */
2453         arch_spin_lock(&global_trace.max_lock);
2454
2455         buffer = global_trace.array_buffer.buffer;
2456         if (buffer)
2457                 ring_buffer_record_disable(buffer);
2458
2459 #ifdef CONFIG_TRACER_MAX_TRACE
2460         buffer = global_trace.max_buffer.buffer;
2461         if (buffer)
2462                 ring_buffer_record_disable(buffer);
2463 #endif
2464
2465         arch_spin_unlock(&global_trace.max_lock);
2466
2467  out:
2468         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2469 }
2470
2471 static void tracing_stop_tr(struct trace_array *tr)
2472 {
2473         struct trace_buffer *buffer;
2474         unsigned long flags;
2475
2476         /* If global, we need to also stop the max tracer */
2477         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2478                 return tracing_stop();
2479
2480         raw_spin_lock_irqsave(&tr->start_lock, flags);
2481         if (tr->stop_count++)
2482                 goto out;
2483
2484         buffer = tr->array_buffer.buffer;
2485         if (buffer)
2486                 ring_buffer_record_disable(buffer);
2487
2488  out:
2489         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2490 }
2491
2492 static int trace_save_cmdline(struct task_struct *tsk)
2493 {
2494         unsigned tpid, idx;
2495
2496         /* treat recording of idle task as a success */
2497         if (!tsk->pid)
2498                 return 1;
2499
2500         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2501
2502         /*
2503          * It's not the end of the world if we don't get
2504          * the lock, but we also don't want to spin
2505          * nor do we want to disable interrupts,
2506          * so if we miss here, then better luck next time.
2507          *
2508          * This is called within the scheduler and wake up, so interrupts
2509          * had better been disabled and run queue lock been held.
2510          */
2511         lockdep_assert_preemption_disabled();
2512         if (!arch_spin_trylock(&trace_cmdline_lock))
2513                 return 0;
2514
2515         idx = savedcmd->map_pid_to_cmdline[tpid];
2516         if (idx == NO_CMDLINE_MAP) {
2517                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2518
2519                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2520                 savedcmd->cmdline_idx = idx;
2521         }
2522
2523         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2524         set_cmdline(idx, tsk->comm);
2525
2526         arch_spin_unlock(&trace_cmdline_lock);
2527
2528         return 1;
2529 }
2530
2531 static void __trace_find_cmdline(int pid, char comm[])
2532 {
2533         unsigned map;
2534         int tpid;
2535
2536         if (!pid) {
2537                 strcpy(comm, "<idle>");
2538                 return;
2539         }
2540
2541         if (WARN_ON_ONCE(pid < 0)) {
2542                 strcpy(comm, "<XXX>");
2543                 return;
2544         }
2545
2546         tpid = pid & (PID_MAX_DEFAULT - 1);
2547         map = savedcmd->map_pid_to_cmdline[tpid];
2548         if (map != NO_CMDLINE_MAP) {
2549                 tpid = savedcmd->map_cmdline_to_pid[map];
2550                 if (tpid == pid) {
2551                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2552                         return;
2553                 }
2554         }
2555         strcpy(comm, "<...>");
2556 }
2557
2558 void trace_find_cmdline(int pid, char comm[])
2559 {
2560         preempt_disable();
2561         arch_spin_lock(&trace_cmdline_lock);
2562
2563         __trace_find_cmdline(pid, comm);
2564
2565         arch_spin_unlock(&trace_cmdline_lock);
2566         preempt_enable();
2567 }
2568
2569 static int *trace_find_tgid_ptr(int pid)
2570 {
2571         /*
2572          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2573          * if we observe a non-NULL tgid_map then we also observe the correct
2574          * tgid_map_max.
2575          */
2576         int *map = smp_load_acquire(&tgid_map);
2577
2578         if (unlikely(!map || pid > tgid_map_max))
2579                 return NULL;
2580
2581         return &map[pid];
2582 }
2583
2584 int trace_find_tgid(int pid)
2585 {
2586         int *ptr = trace_find_tgid_ptr(pid);
2587
2588         return ptr ? *ptr : 0;
2589 }
2590
2591 static int trace_save_tgid(struct task_struct *tsk)
2592 {
2593         int *ptr;
2594
2595         /* treat recording of idle task as a success */
2596         if (!tsk->pid)
2597                 return 1;
2598
2599         ptr = trace_find_tgid_ptr(tsk->pid);
2600         if (!ptr)
2601                 return 0;
2602
2603         *ptr = tsk->tgid;
2604         return 1;
2605 }
2606
2607 static bool tracing_record_taskinfo_skip(int flags)
2608 {
2609         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2610                 return true;
2611         if (!__this_cpu_read(trace_taskinfo_save))
2612                 return true;
2613         return false;
2614 }
2615
2616 /**
2617  * tracing_record_taskinfo - record the task info of a task
2618  *
2619  * @task:  task to record
2620  * @flags: TRACE_RECORD_CMDLINE for recording comm
2621  *         TRACE_RECORD_TGID for recording tgid
2622  */
2623 void tracing_record_taskinfo(struct task_struct *task, int flags)
2624 {
2625         bool done;
2626
2627         if (tracing_record_taskinfo_skip(flags))
2628                 return;
2629
2630         /*
2631          * Record as much task information as possible. If some fail, continue
2632          * to try to record the others.
2633          */
2634         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2635         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2636
2637         /* If recording any information failed, retry again soon. */
2638         if (!done)
2639                 return;
2640
2641         __this_cpu_write(trace_taskinfo_save, false);
2642 }
2643
2644 /**
2645  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2646  *
2647  * @prev: previous task during sched_switch
2648  * @next: next task during sched_switch
2649  * @flags: TRACE_RECORD_CMDLINE for recording comm
2650  *         TRACE_RECORD_TGID for recording tgid
2651  */
2652 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2653                                           struct task_struct *next, int flags)
2654 {
2655         bool done;
2656
2657         if (tracing_record_taskinfo_skip(flags))
2658                 return;
2659
2660         /*
2661          * Record as much task information as possible. If some fail, continue
2662          * to try to record the others.
2663          */
2664         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2665         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2666         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2667         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2668
2669         /* If recording any information failed, retry again soon. */
2670         if (!done)
2671                 return;
2672
2673         __this_cpu_write(trace_taskinfo_save, false);
2674 }
2675
2676 /* Helpers to record a specific task information */
2677 void tracing_record_cmdline(struct task_struct *task)
2678 {
2679         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2680 }
2681
2682 void tracing_record_tgid(struct task_struct *task)
2683 {
2684         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2685 }
2686
2687 /*
2688  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2689  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2690  * simplifies those functions and keeps them in sync.
2691  */
2692 enum print_line_t trace_handle_return(struct trace_seq *s)
2693 {
2694         return trace_seq_has_overflowed(s) ?
2695                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2696 }
2697 EXPORT_SYMBOL_GPL(trace_handle_return);
2698
2699 static unsigned short migration_disable_value(void)
2700 {
2701 #if defined(CONFIG_SMP)
2702         return current->migration_disabled;
2703 #else
2704         return 0;
2705 #endif
2706 }
2707
2708 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2709 {
2710         unsigned int trace_flags = irqs_status;
2711         unsigned int pc;
2712
2713         pc = preempt_count();
2714
2715         if (pc & NMI_MASK)
2716                 trace_flags |= TRACE_FLAG_NMI;
2717         if (pc & HARDIRQ_MASK)
2718                 trace_flags |= TRACE_FLAG_HARDIRQ;
2719         if (in_serving_softirq())
2720                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2721         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2722                 trace_flags |= TRACE_FLAG_BH_OFF;
2723
2724         if (tif_need_resched())
2725                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2726         if (test_preempt_need_resched())
2727                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2728         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2729                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2730 }
2731
2732 struct ring_buffer_event *
2733 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2734                           int type,
2735                           unsigned long len,
2736                           unsigned int trace_ctx)
2737 {
2738         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2739 }
2740
2741 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2742 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2743 static int trace_buffered_event_ref;
2744
2745 /**
2746  * trace_buffered_event_enable - enable buffering events
2747  *
2748  * When events are being filtered, it is quicker to use a temporary
2749  * buffer to write the event data into if there's a likely chance
2750  * that it will not be committed. The discard of the ring buffer
2751  * is not as fast as committing, and is much slower than copying
2752  * a commit.
2753  *
2754  * When an event is to be filtered, allocate per cpu buffers to
2755  * write the event data into, and if the event is filtered and discarded
2756  * it is simply dropped, otherwise, the entire data is to be committed
2757  * in one shot.
2758  */
2759 void trace_buffered_event_enable(void)
2760 {
2761         struct ring_buffer_event *event;
2762         struct page *page;
2763         int cpu;
2764
2765         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2766
2767         if (trace_buffered_event_ref++)
2768                 return;
2769
2770         for_each_tracing_cpu(cpu) {
2771                 page = alloc_pages_node(cpu_to_node(cpu),
2772                                         GFP_KERNEL | __GFP_NORETRY, 0);
2773                 if (!page)
2774                         goto failed;
2775
2776                 event = page_address(page);
2777                 memset(event, 0, sizeof(*event));
2778
2779                 per_cpu(trace_buffered_event, cpu) = event;
2780
2781                 preempt_disable();
2782                 if (cpu == smp_processor_id() &&
2783                     __this_cpu_read(trace_buffered_event) !=
2784                     per_cpu(trace_buffered_event, cpu))
2785                         WARN_ON_ONCE(1);
2786                 preempt_enable();
2787         }
2788
2789         return;
2790  failed:
2791         trace_buffered_event_disable();
2792 }
2793
2794 static void enable_trace_buffered_event(void *data)
2795 {
2796         /* Probably not needed, but do it anyway */
2797         smp_rmb();
2798         this_cpu_dec(trace_buffered_event_cnt);
2799 }
2800
2801 static void disable_trace_buffered_event(void *data)
2802 {
2803         this_cpu_inc(trace_buffered_event_cnt);
2804 }
2805
2806 /**
2807  * trace_buffered_event_disable - disable buffering events
2808  *
2809  * When a filter is removed, it is faster to not use the buffered
2810  * events, and to commit directly into the ring buffer. Free up
2811  * the temp buffers when there are no more users. This requires
2812  * special synchronization with current events.
2813  */
2814 void trace_buffered_event_disable(void)
2815 {
2816         int cpu;
2817
2818         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2819
2820         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2821                 return;
2822
2823         if (--trace_buffered_event_ref)
2824                 return;
2825
2826         preempt_disable();
2827         /* For each CPU, set the buffer as used. */
2828         smp_call_function_many(tracing_buffer_mask,
2829                                disable_trace_buffered_event, NULL, 1);
2830         preempt_enable();
2831
2832         /* Wait for all current users to finish */
2833         synchronize_rcu();
2834
2835         for_each_tracing_cpu(cpu) {
2836                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2837                 per_cpu(trace_buffered_event, cpu) = NULL;
2838         }
2839         /*
2840          * Make sure trace_buffered_event is NULL before clearing
2841          * trace_buffered_event_cnt.
2842          */
2843         smp_wmb();
2844
2845         preempt_disable();
2846         /* Do the work on each cpu */
2847         smp_call_function_many(tracing_buffer_mask,
2848                                enable_trace_buffered_event, NULL, 1);
2849         preempt_enable();
2850 }
2851
2852 static struct trace_buffer *temp_buffer;
2853
2854 struct ring_buffer_event *
2855 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2856                           struct trace_event_file *trace_file,
2857                           int type, unsigned long len,
2858                           unsigned int trace_ctx)
2859 {
2860         struct ring_buffer_event *entry;
2861         struct trace_array *tr = trace_file->tr;
2862         int val;
2863
2864         *current_rb = tr->array_buffer.buffer;
2865
2866         if (!tr->no_filter_buffering_ref &&
2867             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2868                 preempt_disable_notrace();
2869                 /*
2870                  * Filtering is on, so try to use the per cpu buffer first.
2871                  * This buffer will simulate a ring_buffer_event,
2872                  * where the type_len is zero and the array[0] will
2873                  * hold the full length.
2874                  * (see include/linux/ring-buffer.h for details on
2875                  *  how the ring_buffer_event is structured).
2876                  *
2877                  * Using a temp buffer during filtering and copying it
2878                  * on a matched filter is quicker than writing directly
2879                  * into the ring buffer and then discarding it when
2880                  * it doesn't match. That is because the discard
2881                  * requires several atomic operations to get right.
2882                  * Copying on match and doing nothing on a failed match
2883                  * is still quicker than no copy on match, but having
2884                  * to discard out of the ring buffer on a failed match.
2885                  */
2886                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2887                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2888
2889                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2890
2891                         /*
2892                          * Preemption is disabled, but interrupts and NMIs
2893                          * can still come in now. If that happens after
2894                          * the above increment, then it will have to go
2895                          * back to the old method of allocating the event
2896                          * on the ring buffer, and if the filter fails, it
2897                          * will have to call ring_buffer_discard_commit()
2898                          * to remove it.
2899                          *
2900                          * Need to also check the unlikely case that the
2901                          * length is bigger than the temp buffer size.
2902                          * If that happens, then the reserve is pretty much
2903                          * guaranteed to fail, as the ring buffer currently
2904                          * only allows events less than a page. But that may
2905                          * change in the future, so let the ring buffer reserve
2906                          * handle the failure in that case.
2907                          */
2908                         if (val == 1 && likely(len <= max_len)) {
2909                                 trace_event_setup(entry, type, trace_ctx);
2910                                 entry->array[0] = len;
2911                                 /* Return with preemption disabled */
2912                                 return entry;
2913                         }
2914                         this_cpu_dec(trace_buffered_event_cnt);
2915                 }
2916                 /* __trace_buffer_lock_reserve() disables preemption */
2917                 preempt_enable_notrace();
2918         }
2919
2920         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2921                                             trace_ctx);
2922         /*
2923          * If tracing is off, but we have triggers enabled
2924          * we still need to look at the event data. Use the temp_buffer
2925          * to store the trace event for the trigger to use. It's recursive
2926          * safe and will not be recorded anywhere.
2927          */
2928         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2929                 *current_rb = temp_buffer;
2930                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2931                                                     trace_ctx);
2932         }
2933         return entry;
2934 }
2935 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2936
2937 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2938 static DEFINE_MUTEX(tracepoint_printk_mutex);
2939
2940 static void output_printk(struct trace_event_buffer *fbuffer)
2941 {
2942         struct trace_event_call *event_call;
2943         struct trace_event_file *file;
2944         struct trace_event *event;
2945         unsigned long flags;
2946         struct trace_iterator *iter = tracepoint_print_iter;
2947
2948         /* We should never get here if iter is NULL */
2949         if (WARN_ON_ONCE(!iter))
2950                 return;
2951
2952         event_call = fbuffer->trace_file->event_call;
2953         if (!event_call || !event_call->event.funcs ||
2954             !event_call->event.funcs->trace)
2955                 return;
2956
2957         file = fbuffer->trace_file;
2958         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2959             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2960              !filter_match_preds(file->filter, fbuffer->entry)))
2961                 return;
2962
2963         event = &fbuffer->trace_file->event_call->event;
2964
2965         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2966         trace_seq_init(&iter->seq);
2967         iter->ent = fbuffer->entry;
2968         event_call->event.funcs->trace(iter, 0, event);
2969         trace_seq_putc(&iter->seq, 0);
2970         printk("%s", iter->seq.buffer);
2971
2972         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2973 }
2974
2975 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2976                              void *buffer, size_t *lenp,
2977                              loff_t *ppos)
2978 {
2979         int save_tracepoint_printk;
2980         int ret;
2981
2982         mutex_lock(&tracepoint_printk_mutex);
2983         save_tracepoint_printk = tracepoint_printk;
2984
2985         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2986
2987         /*
2988          * This will force exiting early, as tracepoint_printk
2989          * is always zero when tracepoint_printk_iter is not allocated
2990          */
2991         if (!tracepoint_print_iter)
2992                 tracepoint_printk = 0;
2993
2994         if (save_tracepoint_printk == tracepoint_printk)
2995                 goto out;
2996
2997         if (tracepoint_printk)
2998                 static_key_enable(&tracepoint_printk_key.key);
2999         else
3000                 static_key_disable(&tracepoint_printk_key.key);
3001
3002  out:
3003         mutex_unlock(&tracepoint_printk_mutex);
3004
3005         return ret;
3006 }
3007
3008 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3009 {
3010         enum event_trigger_type tt = ETT_NONE;
3011         struct trace_event_file *file = fbuffer->trace_file;
3012
3013         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3014                         fbuffer->entry, &tt))
3015                 goto discard;
3016
3017         if (static_key_false(&tracepoint_printk_key.key))
3018                 output_printk(fbuffer);
3019
3020         if (static_branch_unlikely(&trace_event_exports_enabled))
3021                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3022
3023         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3024                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3025
3026 discard:
3027         if (tt)
3028                 event_triggers_post_call(file, tt);
3029
3030 }
3031 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3032
3033 /*
3034  * Skip 3:
3035  *
3036  *   trace_buffer_unlock_commit_regs()
3037  *   trace_event_buffer_commit()
3038  *   trace_event_raw_event_xxx()
3039  */
3040 # define STACK_SKIP 3
3041
3042 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3043                                      struct trace_buffer *buffer,
3044                                      struct ring_buffer_event *event,
3045                                      unsigned int trace_ctx,
3046                                      struct pt_regs *regs)
3047 {
3048         __buffer_unlock_commit(buffer, event);
3049
3050         /*
3051          * If regs is not set, then skip the necessary functions.
3052          * Note, we can still get here via blktrace, wakeup tracer
3053          * and mmiotrace, but that's ok if they lose a function or
3054          * two. They are not that meaningful.
3055          */
3056         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3057         ftrace_trace_userstack(tr, buffer, trace_ctx);
3058 }
3059
3060 /*
3061  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3062  */
3063 void
3064 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3065                                    struct ring_buffer_event *event)
3066 {
3067         __buffer_unlock_commit(buffer, event);
3068 }
3069
3070 void
3071 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3072                parent_ip, unsigned int trace_ctx)
3073 {
3074         struct trace_event_call *call = &event_function;
3075         struct trace_buffer *buffer = tr->array_buffer.buffer;
3076         struct ring_buffer_event *event;
3077         struct ftrace_entry *entry;
3078
3079         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3080                                             trace_ctx);
3081         if (!event)
3082                 return;
3083         entry   = ring_buffer_event_data(event);
3084         entry->ip                       = ip;
3085         entry->parent_ip                = parent_ip;
3086
3087         if (!call_filter_check_discard(call, entry, buffer, event)) {
3088                 if (static_branch_unlikely(&trace_function_exports_enabled))
3089                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3090                 __buffer_unlock_commit(buffer, event);
3091         }
3092 }
3093
3094 #ifdef CONFIG_STACKTRACE
3095
3096 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3097 #define FTRACE_KSTACK_NESTING   4
3098
3099 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3100
3101 struct ftrace_stack {
3102         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3103 };
3104
3105
3106 struct ftrace_stacks {
3107         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3108 };
3109
3110 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3111 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3112
3113 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3114                                  unsigned int trace_ctx,
3115                                  int skip, struct pt_regs *regs)
3116 {
3117         struct trace_event_call *call = &event_kernel_stack;
3118         struct ring_buffer_event *event;
3119         unsigned int size, nr_entries;
3120         struct ftrace_stack *fstack;
3121         struct stack_entry *entry;
3122         int stackidx;
3123
3124         /*
3125          * Add one, for this function and the call to save_stack_trace()
3126          * If regs is set, then these functions will not be in the way.
3127          */
3128 #ifndef CONFIG_UNWINDER_ORC
3129         if (!regs)
3130                 skip++;
3131 #endif
3132
3133         preempt_disable_notrace();
3134
3135         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3136
3137         /* This should never happen. If it does, yell once and skip */
3138         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3139                 goto out;
3140
3141         /*
3142          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3143          * interrupt will either see the value pre increment or post
3144          * increment. If the interrupt happens pre increment it will have
3145          * restored the counter when it returns.  We just need a barrier to
3146          * keep gcc from moving things around.
3147          */
3148         barrier();
3149
3150         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3151         size = ARRAY_SIZE(fstack->calls);
3152
3153         if (regs) {
3154                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3155                                                    size, skip);
3156         } else {
3157                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3158         }
3159
3160         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3161                                     struct_size(entry, caller, nr_entries),
3162                                     trace_ctx);
3163         if (!event)
3164                 goto out;
3165         entry = ring_buffer_event_data(event);
3166
3167         entry->size = nr_entries;
3168         memcpy(&entry->caller, fstack->calls,
3169                flex_array_size(entry, caller, nr_entries));
3170
3171         if (!call_filter_check_discard(call, entry, buffer, event))
3172                 __buffer_unlock_commit(buffer, event);
3173
3174  out:
3175         /* Again, don't let gcc optimize things here */
3176         barrier();
3177         __this_cpu_dec(ftrace_stack_reserve);
3178         preempt_enable_notrace();
3179
3180 }
3181
3182 static inline void ftrace_trace_stack(struct trace_array *tr,
3183                                       struct trace_buffer *buffer,
3184                                       unsigned int trace_ctx,
3185                                       int skip, struct pt_regs *regs)
3186 {
3187         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3188                 return;
3189
3190         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3191 }
3192
3193 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3194                    int skip)
3195 {
3196         struct trace_buffer *buffer = tr->array_buffer.buffer;
3197
3198         if (rcu_is_watching()) {
3199                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3200                 return;
3201         }
3202
3203         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3204                 return;
3205
3206         /*
3207          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3208          * but if the above rcu_is_watching() failed, then the NMI
3209          * triggered someplace critical, and ct_irq_enter() should
3210          * not be called from NMI.
3211          */
3212         if (unlikely(in_nmi()))
3213                 return;
3214
3215         ct_irq_enter_irqson();
3216         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3217         ct_irq_exit_irqson();
3218 }
3219
3220 /**
3221  * trace_dump_stack - record a stack back trace in the trace buffer
3222  * @skip: Number of functions to skip (helper handlers)
3223  */
3224 void trace_dump_stack(int skip)
3225 {
3226         if (tracing_disabled || tracing_selftest_running)
3227                 return;
3228
3229 #ifndef CONFIG_UNWINDER_ORC
3230         /* Skip 1 to skip this function. */
3231         skip++;
3232 #endif
3233         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3234                              tracing_gen_ctx(), skip, NULL);
3235 }
3236 EXPORT_SYMBOL_GPL(trace_dump_stack);
3237
3238 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3239 static DEFINE_PER_CPU(int, user_stack_count);
3240
3241 static void
3242 ftrace_trace_userstack(struct trace_array *tr,
3243                        struct trace_buffer *buffer, unsigned int trace_ctx)
3244 {
3245         struct trace_event_call *call = &event_user_stack;
3246         struct ring_buffer_event *event;
3247         struct userstack_entry *entry;
3248
3249         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3250                 return;
3251
3252         /*
3253          * NMIs can not handle page faults, even with fix ups.
3254          * The save user stack can (and often does) fault.
3255          */
3256         if (unlikely(in_nmi()))
3257                 return;
3258
3259         /*
3260          * prevent recursion, since the user stack tracing may
3261          * trigger other kernel events.
3262          */
3263         preempt_disable();
3264         if (__this_cpu_read(user_stack_count))
3265                 goto out;
3266
3267         __this_cpu_inc(user_stack_count);
3268
3269         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3270                                             sizeof(*entry), trace_ctx);
3271         if (!event)
3272                 goto out_drop_count;
3273         entry   = ring_buffer_event_data(event);
3274
3275         entry->tgid             = current->tgid;
3276         memset(&entry->caller, 0, sizeof(entry->caller));
3277
3278         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3279         if (!call_filter_check_discard(call, entry, buffer, event))
3280                 __buffer_unlock_commit(buffer, event);
3281
3282  out_drop_count:
3283         __this_cpu_dec(user_stack_count);
3284  out:
3285         preempt_enable();
3286 }
3287 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3288 static void ftrace_trace_userstack(struct trace_array *tr,
3289                                    struct trace_buffer *buffer,
3290                                    unsigned int trace_ctx)
3291 {
3292 }
3293 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3294
3295 #endif /* CONFIG_STACKTRACE */
3296
3297 static inline void
3298 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3299                           unsigned long long delta)
3300 {
3301         entry->bottom_delta_ts = delta & U32_MAX;
3302         entry->top_delta_ts = (delta >> 32);
3303 }
3304
3305 void trace_last_func_repeats(struct trace_array *tr,
3306                              struct trace_func_repeats *last_info,
3307                              unsigned int trace_ctx)
3308 {
3309         struct trace_buffer *buffer = tr->array_buffer.buffer;
3310         struct func_repeats_entry *entry;
3311         struct ring_buffer_event *event;
3312         u64 delta;
3313
3314         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3315                                             sizeof(*entry), trace_ctx);
3316         if (!event)
3317                 return;
3318
3319         delta = ring_buffer_event_time_stamp(buffer, event) -
3320                 last_info->ts_last_call;
3321
3322         entry = ring_buffer_event_data(event);
3323         entry->ip = last_info->ip;
3324         entry->parent_ip = last_info->parent_ip;
3325         entry->count = last_info->count;
3326         func_repeats_set_delta_ts(entry, delta);
3327
3328         __buffer_unlock_commit(buffer, event);
3329 }
3330
3331 /* created for use with alloc_percpu */
3332 struct trace_buffer_struct {
3333         int nesting;
3334         char buffer[4][TRACE_BUF_SIZE];
3335 };
3336
3337 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3338
3339 /*
3340  * This allows for lockless recording.  If we're nested too deeply, then
3341  * this returns NULL.
3342  */
3343 static char *get_trace_buf(void)
3344 {
3345         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3346
3347         if (!trace_percpu_buffer || buffer->nesting >= 4)
3348                 return NULL;
3349
3350         buffer->nesting++;
3351
3352         /* Interrupts must see nesting incremented before we use the buffer */
3353         barrier();
3354         return &buffer->buffer[buffer->nesting - 1][0];
3355 }
3356
3357 static void put_trace_buf(void)
3358 {
3359         /* Don't let the decrement of nesting leak before this */
3360         barrier();
3361         this_cpu_dec(trace_percpu_buffer->nesting);
3362 }
3363
3364 static int alloc_percpu_trace_buffer(void)
3365 {
3366         struct trace_buffer_struct __percpu *buffers;
3367
3368         if (trace_percpu_buffer)
3369                 return 0;
3370
3371         buffers = alloc_percpu(struct trace_buffer_struct);
3372         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3373                 return -ENOMEM;
3374
3375         trace_percpu_buffer = buffers;
3376         return 0;
3377 }
3378
3379 static int buffers_allocated;
3380
3381 void trace_printk_init_buffers(void)
3382 {
3383         if (buffers_allocated)
3384                 return;
3385
3386         if (alloc_percpu_trace_buffer())
3387                 return;
3388
3389         /* trace_printk() is for debug use only. Don't use it in production. */
3390
3391         pr_warn("\n");
3392         pr_warn("**********************************************************\n");
3393         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3394         pr_warn("**                                                      **\n");
3395         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3396         pr_warn("**                                                      **\n");
3397         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3398         pr_warn("** unsafe for production use.                           **\n");
3399         pr_warn("**                                                      **\n");
3400         pr_warn("** If you see this message and you are not debugging    **\n");
3401         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3402         pr_warn("**                                                      **\n");
3403         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3404         pr_warn("**********************************************************\n");
3405
3406         /* Expand the buffers to set size */
3407         tracing_update_buffers(&global_trace);
3408
3409         buffers_allocated = 1;
3410
3411         /*
3412          * trace_printk_init_buffers() can be called by modules.
3413          * If that happens, then we need to start cmdline recording
3414          * directly here. If the global_trace.buffer is already
3415          * allocated here, then this was called by module code.
3416          */
3417         if (global_trace.array_buffer.buffer)
3418                 tracing_start_cmdline_record();
3419 }
3420 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3421
3422 void trace_printk_start_comm(void)
3423 {
3424         /* Start tracing comms if trace printk is set */
3425         if (!buffers_allocated)
3426                 return;
3427         tracing_start_cmdline_record();
3428 }
3429
3430 static void trace_printk_start_stop_comm(int enabled)
3431 {
3432         if (!buffers_allocated)
3433                 return;
3434
3435         if (enabled)
3436                 tracing_start_cmdline_record();
3437         else
3438                 tracing_stop_cmdline_record();
3439 }
3440
3441 /**
3442  * trace_vbprintk - write binary msg to tracing buffer
3443  * @ip:    The address of the caller
3444  * @fmt:   The string format to write to the buffer
3445  * @args:  Arguments for @fmt
3446  */
3447 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3448 {
3449         struct trace_event_call *call = &event_bprint;
3450         struct ring_buffer_event *event;
3451         struct trace_buffer *buffer;
3452         struct trace_array *tr = &global_trace;
3453         struct bprint_entry *entry;
3454         unsigned int trace_ctx;
3455         char *tbuffer;
3456         int len = 0, size;
3457
3458         if (unlikely(tracing_selftest_running || tracing_disabled))
3459                 return 0;
3460
3461         /* Don't pollute graph traces with trace_vprintk internals */
3462         pause_graph_tracing();
3463
3464         trace_ctx = tracing_gen_ctx();
3465         preempt_disable_notrace();
3466
3467         tbuffer = get_trace_buf();
3468         if (!tbuffer) {
3469                 len = 0;
3470                 goto out_nobuffer;
3471         }
3472
3473         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3474
3475         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3476                 goto out_put;
3477
3478         size = sizeof(*entry) + sizeof(u32) * len;
3479         buffer = tr->array_buffer.buffer;
3480         ring_buffer_nest_start(buffer);
3481         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3482                                             trace_ctx);
3483         if (!event)
3484                 goto out;
3485         entry = ring_buffer_event_data(event);
3486         entry->ip                       = ip;
3487         entry->fmt                      = fmt;
3488
3489         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3490         if (!call_filter_check_discard(call, entry, buffer, event)) {
3491                 __buffer_unlock_commit(buffer, event);
3492                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3493         }
3494
3495 out:
3496         ring_buffer_nest_end(buffer);
3497 out_put:
3498         put_trace_buf();
3499
3500 out_nobuffer:
3501         preempt_enable_notrace();
3502         unpause_graph_tracing();
3503
3504         return len;
3505 }
3506 EXPORT_SYMBOL_GPL(trace_vbprintk);
3507
3508 __printf(3, 0)
3509 static int
3510 __trace_array_vprintk(struct trace_buffer *buffer,
3511                       unsigned long ip, const char *fmt, va_list args)
3512 {
3513         struct trace_event_call *call = &event_print;
3514         struct ring_buffer_event *event;
3515         int len = 0, size;
3516         struct print_entry *entry;
3517         unsigned int trace_ctx;
3518         char *tbuffer;
3519
3520         if (tracing_disabled)
3521                 return 0;
3522
3523         /* Don't pollute graph traces with trace_vprintk internals */
3524         pause_graph_tracing();
3525
3526         trace_ctx = tracing_gen_ctx();
3527         preempt_disable_notrace();
3528
3529
3530         tbuffer = get_trace_buf();
3531         if (!tbuffer) {
3532                 len = 0;
3533                 goto out_nobuffer;
3534         }
3535
3536         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3537
3538         size = sizeof(*entry) + len + 1;
3539         ring_buffer_nest_start(buffer);
3540         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3541                                             trace_ctx);
3542         if (!event)
3543                 goto out;
3544         entry = ring_buffer_event_data(event);
3545         entry->ip = ip;
3546
3547         memcpy(&entry->buf, tbuffer, len + 1);
3548         if (!call_filter_check_discard(call, entry, buffer, event)) {
3549                 __buffer_unlock_commit(buffer, event);
3550                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3551         }
3552
3553 out:
3554         ring_buffer_nest_end(buffer);
3555         put_trace_buf();
3556
3557 out_nobuffer:
3558         preempt_enable_notrace();
3559         unpause_graph_tracing();
3560
3561         return len;
3562 }
3563
3564 __printf(3, 0)
3565 int trace_array_vprintk(struct trace_array *tr,
3566                         unsigned long ip, const char *fmt, va_list args)
3567 {
3568         if (tracing_selftest_running && tr == &global_trace)
3569                 return 0;
3570
3571         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3572 }
3573
3574 /**
3575  * trace_array_printk - Print a message to a specific instance
3576  * @tr: The instance trace_array descriptor
3577  * @ip: The instruction pointer that this is called from.
3578  * @fmt: The format to print (printf format)
3579  *
3580  * If a subsystem sets up its own instance, they have the right to
3581  * printk strings into their tracing instance buffer using this
3582  * function. Note, this function will not write into the top level
3583  * buffer (use trace_printk() for that), as writing into the top level
3584  * buffer should only have events that can be individually disabled.
3585  * trace_printk() is only used for debugging a kernel, and should not
3586  * be ever incorporated in normal use.
3587  *
3588  * trace_array_printk() can be used, as it will not add noise to the
3589  * top level tracing buffer.
3590  *
3591  * Note, trace_array_init_printk() must be called on @tr before this
3592  * can be used.
3593  */
3594 __printf(3, 0)
3595 int trace_array_printk(struct trace_array *tr,
3596                        unsigned long ip, const char *fmt, ...)
3597 {
3598         int ret;
3599         va_list ap;
3600
3601         if (!tr)
3602                 return -ENOENT;
3603
3604         /* This is only allowed for created instances */
3605         if (tr == &global_trace)
3606                 return 0;
3607
3608         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3609                 return 0;
3610
3611         va_start(ap, fmt);
3612         ret = trace_array_vprintk(tr, ip, fmt, ap);
3613         va_end(ap);
3614         return ret;
3615 }
3616 EXPORT_SYMBOL_GPL(trace_array_printk);
3617
3618 /**
3619  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3620  * @tr: The trace array to initialize the buffers for
3621  *
3622  * As trace_array_printk() only writes into instances, they are OK to
3623  * have in the kernel (unlike trace_printk()). This needs to be called
3624  * before trace_array_printk() can be used on a trace_array.
3625  */
3626 int trace_array_init_printk(struct trace_array *tr)
3627 {
3628         if (!tr)
3629                 return -ENOENT;
3630
3631         /* This is only allowed for created instances */
3632         if (tr == &global_trace)
3633                 return -EINVAL;
3634
3635         return alloc_percpu_trace_buffer();
3636 }
3637 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3638
3639 __printf(3, 4)
3640 int trace_array_printk_buf(struct trace_buffer *buffer,
3641                            unsigned long ip, const char *fmt, ...)
3642 {
3643         int ret;
3644         va_list ap;
3645
3646         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3647                 return 0;
3648
3649         va_start(ap, fmt);
3650         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3651         va_end(ap);
3652         return ret;
3653 }
3654
3655 __printf(2, 0)
3656 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3657 {
3658         return trace_array_vprintk(&global_trace, ip, fmt, args);
3659 }
3660 EXPORT_SYMBOL_GPL(trace_vprintk);
3661
3662 static void trace_iterator_increment(struct trace_iterator *iter)
3663 {
3664         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3665
3666         iter->idx++;
3667         if (buf_iter)
3668                 ring_buffer_iter_advance(buf_iter);
3669 }
3670
3671 static struct trace_entry *
3672 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3673                 unsigned long *lost_events)
3674 {
3675         struct ring_buffer_event *event;
3676         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3677
3678         if (buf_iter) {
3679                 event = ring_buffer_iter_peek(buf_iter, ts);
3680                 if (lost_events)
3681                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3682                                 (unsigned long)-1 : 0;
3683         } else {
3684                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3685                                          lost_events);
3686         }
3687
3688         if (event) {
3689                 iter->ent_size = ring_buffer_event_length(event);
3690                 return ring_buffer_event_data(event);
3691         }
3692         iter->ent_size = 0;
3693         return NULL;
3694 }
3695
3696 static struct trace_entry *
3697 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3698                   unsigned long *missing_events, u64 *ent_ts)
3699 {
3700         struct trace_buffer *buffer = iter->array_buffer->buffer;
3701         struct trace_entry *ent, *next = NULL;
3702         unsigned long lost_events = 0, next_lost = 0;
3703         int cpu_file = iter->cpu_file;
3704         u64 next_ts = 0, ts;
3705         int next_cpu = -1;
3706         int next_size = 0;
3707         int cpu;
3708
3709         /*
3710          * If we are in a per_cpu trace file, don't bother by iterating over
3711          * all cpu and peek directly.
3712          */
3713         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3714                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3715                         return NULL;
3716                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3717                 if (ent_cpu)
3718                         *ent_cpu = cpu_file;
3719
3720                 return ent;
3721         }
3722
3723         for_each_tracing_cpu(cpu) {
3724
3725                 if (ring_buffer_empty_cpu(buffer, cpu))
3726                         continue;
3727
3728                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3729
3730                 /*
3731                  * Pick the entry with the smallest timestamp:
3732                  */
3733                 if (ent && (!next || ts < next_ts)) {
3734                         next = ent;
3735                         next_cpu = cpu;
3736                         next_ts = ts;
3737                         next_lost = lost_events;
3738                         next_size = iter->ent_size;
3739                 }
3740         }
3741
3742         iter->ent_size = next_size;
3743
3744         if (ent_cpu)
3745                 *ent_cpu = next_cpu;
3746
3747         if (ent_ts)
3748                 *ent_ts = next_ts;
3749
3750         if (missing_events)
3751                 *missing_events = next_lost;
3752
3753         return next;
3754 }
3755
3756 #define STATIC_FMT_BUF_SIZE     128
3757 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3758
3759 char *trace_iter_expand_format(struct trace_iterator *iter)
3760 {
3761         char *tmp;
3762
3763         /*
3764          * iter->tr is NULL when used with tp_printk, which makes
3765          * this get called where it is not safe to call krealloc().
3766          */
3767         if (!iter->tr || iter->fmt == static_fmt_buf)
3768                 return NULL;
3769
3770         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3771                        GFP_KERNEL);
3772         if (tmp) {
3773                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3774                 iter->fmt = tmp;
3775         }
3776
3777         return tmp;
3778 }
3779
3780 /* Returns true if the string is safe to dereference from an event */
3781 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3782                            bool star, int len)
3783 {
3784         unsigned long addr = (unsigned long)str;
3785         struct trace_event *trace_event;
3786         struct trace_event_call *event;
3787
3788         /* Ignore strings with no length */
3789         if (star && !len)
3790                 return true;
3791
3792         /* OK if part of the event data */
3793         if ((addr >= (unsigned long)iter->ent) &&
3794             (addr < (unsigned long)iter->ent + iter->ent_size))
3795                 return true;
3796
3797         /* OK if part of the temp seq buffer */
3798         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3799             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3800                 return true;
3801
3802         /* Core rodata can not be freed */
3803         if (is_kernel_rodata(addr))
3804                 return true;
3805
3806         if (trace_is_tracepoint_string(str))
3807                 return true;
3808
3809         /*
3810          * Now this could be a module event, referencing core module
3811          * data, which is OK.
3812          */
3813         if (!iter->ent)
3814                 return false;
3815
3816         trace_event = ftrace_find_event(iter->ent->type);
3817         if (!trace_event)
3818                 return false;
3819
3820         event = container_of(trace_event, struct trace_event_call, event);
3821         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3822                 return false;
3823
3824         /* Would rather have rodata, but this will suffice */
3825         if (within_module_core(addr, event->module))
3826                 return true;
3827
3828         return false;
3829 }
3830
3831 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3832
3833 static int test_can_verify_check(const char *fmt, ...)
3834 {
3835         char buf[16];
3836         va_list ap;
3837         int ret;
3838
3839         /*
3840          * The verifier is dependent on vsnprintf() modifies the va_list
3841          * passed to it, where it is sent as a reference. Some architectures
3842          * (like x86_32) passes it by value, which means that vsnprintf()
3843          * does not modify the va_list passed to it, and the verifier
3844          * would then need to be able to understand all the values that
3845          * vsnprintf can use. If it is passed by value, then the verifier
3846          * is disabled.
3847          */
3848         va_start(ap, fmt);
3849         vsnprintf(buf, 16, "%d", ap);
3850         ret = va_arg(ap, int);
3851         va_end(ap);
3852
3853         return ret;
3854 }
3855
3856 static void test_can_verify(void)
3857 {
3858         if (!test_can_verify_check("%d %d", 0, 1)) {
3859                 pr_info("trace event string verifier disabled\n");
3860                 static_branch_inc(&trace_no_verify);
3861         }
3862 }
3863
3864 /**
3865  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3866  * @iter: The iterator that holds the seq buffer and the event being printed
3867  * @fmt: The format used to print the event
3868  * @ap: The va_list holding the data to print from @fmt.
3869  *
3870  * This writes the data into the @iter->seq buffer using the data from
3871  * @fmt and @ap. If the format has a %s, then the source of the string
3872  * is examined to make sure it is safe to print, otherwise it will
3873  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3874  * pointer.
3875  */
3876 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3877                          va_list ap)
3878 {
3879         const char *p = fmt;
3880         const char *str;
3881         int i, j;
3882
3883         if (WARN_ON_ONCE(!fmt))
3884                 return;
3885
3886         if (static_branch_unlikely(&trace_no_verify))
3887                 goto print;
3888
3889         /* Don't bother checking when doing a ftrace_dump() */
3890         if (iter->fmt == static_fmt_buf)
3891                 goto print;
3892
3893         while (*p) {
3894                 bool star = false;
3895                 int len = 0;
3896
3897                 j = 0;
3898
3899                 /* We only care about %s and variants */
3900                 for (i = 0; p[i]; i++) {
3901                         if (i + 1 >= iter->fmt_size) {
3902                                 /*
3903                                  * If we can't expand the copy buffer,
3904                                  * just print it.
3905                                  */
3906                                 if (!trace_iter_expand_format(iter))
3907                                         goto print;
3908                         }
3909
3910                         if (p[i] == '\\' && p[i+1]) {
3911                                 i++;
3912                                 continue;
3913                         }
3914                         if (p[i] == '%') {
3915                                 /* Need to test cases like %08.*s */
3916                                 for (j = 1; p[i+j]; j++) {
3917                                         if (isdigit(p[i+j]) ||
3918                                             p[i+j] == '.')
3919                                                 continue;
3920                                         if (p[i+j] == '*') {
3921                                                 star = true;
3922                                                 continue;
3923                                         }
3924                                         break;
3925                                 }
3926                                 if (p[i+j] == 's')
3927                                         break;
3928                                 star = false;
3929                         }
3930                         j = 0;
3931                 }
3932                 /* If no %s found then just print normally */
3933                 if (!p[i])
3934                         break;
3935
3936                 /* Copy up to the %s, and print that */
3937                 strncpy(iter->fmt, p, i);
3938                 iter->fmt[i] = '\0';
3939                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3940
3941                 /*
3942                  * If iter->seq is full, the above call no longer guarantees
3943                  * that ap is in sync with fmt processing, and further calls
3944                  * to va_arg() can return wrong positional arguments.
3945                  *
3946                  * Ensure that ap is no longer used in this case.
3947                  */
3948                 if (iter->seq.full) {
3949                         p = "";
3950                         break;
3951                 }
3952
3953                 if (star)
3954                         len = va_arg(ap, int);
3955
3956                 /* The ap now points to the string data of the %s */
3957                 str = va_arg(ap, const char *);
3958
3959                 /*
3960                  * If you hit this warning, it is likely that the
3961                  * trace event in question used %s on a string that
3962                  * was saved at the time of the event, but may not be
3963                  * around when the trace is read. Use __string(),
3964                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3965                  * instead. See samples/trace_events/trace-events-sample.h
3966                  * for reference.
3967                  */
3968                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3969                               "fmt: '%s' current_buffer: '%s'",
3970                               fmt, seq_buf_str(&iter->seq.seq))) {
3971                         int ret;
3972
3973                         /* Try to safely read the string */
3974                         if (star) {
3975                                 if (len + 1 > iter->fmt_size)
3976                                         len = iter->fmt_size - 1;
3977                                 if (len < 0)
3978                                         len = 0;
3979                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3980                                 iter->fmt[len] = 0;
3981                                 star = false;
3982                         } else {
3983                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3984                                                                   iter->fmt_size);
3985                         }
3986                         if (ret < 0)
3987                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3988                         else
3989                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3990                                                  str, iter->fmt);
3991                         str = "[UNSAFE-MEMORY]";
3992                         strcpy(iter->fmt, "%s");
3993                 } else {
3994                         strncpy(iter->fmt, p + i, j + 1);
3995                         iter->fmt[j+1] = '\0';
3996                 }
3997                 if (star)
3998                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3999                 else
4000                         trace_seq_printf(&iter->seq, iter->fmt, str);
4001
4002                 p += i + j + 1;
4003         }
4004  print:
4005         if (*p)
4006                 trace_seq_vprintf(&iter->seq, p, ap);
4007 }
4008
4009 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4010 {
4011         const char *p, *new_fmt;
4012         char *q;
4013
4014         if (WARN_ON_ONCE(!fmt))
4015                 return fmt;
4016
4017         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4018                 return fmt;
4019
4020         p = fmt;
4021         new_fmt = q = iter->fmt;
4022         while (*p) {
4023                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4024                         if (!trace_iter_expand_format(iter))
4025                                 return fmt;
4026
4027                         q += iter->fmt - new_fmt;
4028                         new_fmt = iter->fmt;
4029                 }
4030
4031                 *q++ = *p++;
4032
4033                 /* Replace %p with %px */
4034                 if (p[-1] == '%') {
4035                         if (p[0] == '%') {
4036                                 *q++ = *p++;
4037                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4038                                 *q++ = *p++;
4039                                 *q++ = 'x';
4040                         }
4041                 }
4042         }
4043         *q = '\0';
4044
4045         return new_fmt;
4046 }
4047
4048 #define STATIC_TEMP_BUF_SIZE    128
4049 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4050
4051 /* Find the next real entry, without updating the iterator itself */
4052 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4053                                           int *ent_cpu, u64 *ent_ts)
4054 {
4055         /* __find_next_entry will reset ent_size */
4056         int ent_size = iter->ent_size;
4057         struct trace_entry *entry;
4058
4059         /*
4060          * If called from ftrace_dump(), then the iter->temp buffer
4061          * will be the static_temp_buf and not created from kmalloc.
4062          * If the entry size is greater than the buffer, we can
4063          * not save it. Just return NULL in that case. This is only
4064          * used to add markers when two consecutive events' time
4065          * stamps have a large delta. See trace_print_lat_context()
4066          */
4067         if (iter->temp == static_temp_buf &&
4068             STATIC_TEMP_BUF_SIZE < ent_size)
4069                 return NULL;
4070
4071         /*
4072          * The __find_next_entry() may call peek_next_entry(), which may
4073          * call ring_buffer_peek() that may make the contents of iter->ent
4074          * undefined. Need to copy iter->ent now.
4075          */
4076         if (iter->ent && iter->ent != iter->temp) {
4077                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4078                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4079                         void *temp;
4080                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4081                         if (!temp)
4082                                 return NULL;
4083                         kfree(iter->temp);
4084                         iter->temp = temp;
4085                         iter->temp_size = iter->ent_size;
4086                 }
4087                 memcpy(iter->temp, iter->ent, iter->ent_size);
4088                 iter->ent = iter->temp;
4089         }
4090         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4091         /* Put back the original ent_size */
4092         iter->ent_size = ent_size;
4093
4094         return entry;
4095 }
4096
4097 /* Find the next real entry, and increment the iterator to the next entry */
4098 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4099 {
4100         iter->ent = __find_next_entry(iter, &iter->cpu,
4101                                       &iter->lost_events, &iter->ts);
4102
4103         if (iter->ent)
4104                 trace_iterator_increment(iter);
4105
4106         return iter->ent ? iter : NULL;
4107 }
4108
4109 static void trace_consume(struct trace_iterator *iter)
4110 {
4111         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4112                             &iter->lost_events);
4113 }
4114
4115 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4116 {
4117         struct trace_iterator *iter = m->private;
4118         int i = (int)*pos;
4119         void *ent;
4120
4121         WARN_ON_ONCE(iter->leftover);
4122
4123         (*pos)++;
4124
4125         /* can't go backwards */
4126         if (iter->idx > i)
4127                 return NULL;
4128
4129         if (iter->idx < 0)
4130                 ent = trace_find_next_entry_inc(iter);
4131         else
4132                 ent = iter;
4133
4134         while (ent && iter->idx < i)
4135                 ent = trace_find_next_entry_inc(iter);
4136
4137         iter->pos = *pos;
4138
4139         return ent;
4140 }
4141
4142 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4143 {
4144         struct ring_buffer_iter *buf_iter;
4145         unsigned long entries = 0;
4146         u64 ts;
4147
4148         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4149
4150         buf_iter = trace_buffer_iter(iter, cpu);
4151         if (!buf_iter)
4152                 return;
4153
4154         ring_buffer_iter_reset(buf_iter);
4155
4156         /*
4157          * We could have the case with the max latency tracers
4158          * that a reset never took place on a cpu. This is evident
4159          * by the timestamp being before the start of the buffer.
4160          */
4161         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4162                 if (ts >= iter->array_buffer->time_start)
4163                         break;
4164                 entries++;
4165                 ring_buffer_iter_advance(buf_iter);
4166         }
4167
4168         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4169 }
4170
4171 /*
4172  * The current tracer is copied to avoid a global locking
4173  * all around.
4174  */
4175 static void *s_start(struct seq_file *m, loff_t *pos)
4176 {
4177         struct trace_iterator *iter = m->private;
4178         struct trace_array *tr = iter->tr;
4179         int cpu_file = iter->cpu_file;
4180         void *p = NULL;
4181         loff_t l = 0;
4182         int cpu;
4183
4184         mutex_lock(&trace_types_lock);
4185         if (unlikely(tr->current_trace != iter->trace)) {
4186                 /* Close iter->trace before switching to the new current tracer */
4187                 if (iter->trace->close)
4188                         iter->trace->close(iter);
4189                 iter->trace = tr->current_trace;
4190                 /* Reopen the new current tracer */
4191                 if (iter->trace->open)
4192                         iter->trace->open(iter);
4193         }
4194         mutex_unlock(&trace_types_lock);
4195
4196 #ifdef CONFIG_TRACER_MAX_TRACE
4197         if (iter->snapshot && iter->trace->use_max_tr)
4198                 return ERR_PTR(-EBUSY);
4199 #endif
4200
4201         if (*pos != iter->pos) {
4202                 iter->ent = NULL;
4203                 iter->cpu = 0;
4204                 iter->idx = -1;
4205
4206                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4207                         for_each_tracing_cpu(cpu)
4208                                 tracing_iter_reset(iter, cpu);
4209                 } else
4210                         tracing_iter_reset(iter, cpu_file);
4211
4212                 iter->leftover = 0;
4213                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4214                         ;
4215
4216         } else {
4217                 /*
4218                  * If we overflowed the seq_file before, then we want
4219                  * to just reuse the trace_seq buffer again.
4220                  */
4221                 if (iter->leftover)
4222                         p = iter;
4223                 else {
4224                         l = *pos - 1;
4225                         p = s_next(m, p, &l);
4226                 }
4227         }
4228
4229         trace_event_read_lock();
4230         trace_access_lock(cpu_file);
4231         return p;
4232 }
4233
4234 static void s_stop(struct seq_file *m, void *p)
4235 {
4236         struct trace_iterator *iter = m->private;
4237
4238 #ifdef CONFIG_TRACER_MAX_TRACE
4239         if (iter->snapshot && iter->trace->use_max_tr)
4240                 return;
4241 #endif
4242
4243         trace_access_unlock(iter->cpu_file);
4244         trace_event_read_unlock();
4245 }
4246
4247 static void
4248 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4249                       unsigned long *entries, int cpu)
4250 {
4251         unsigned long count;
4252
4253         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4254         /*
4255          * If this buffer has skipped entries, then we hold all
4256          * entries for the trace and we need to ignore the
4257          * ones before the time stamp.
4258          */
4259         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4260                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4261                 /* total is the same as the entries */
4262                 *total = count;
4263         } else
4264                 *total = count +
4265                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4266         *entries = count;
4267 }
4268
4269 static void
4270 get_total_entries(struct array_buffer *buf,
4271                   unsigned long *total, unsigned long *entries)
4272 {
4273         unsigned long t, e;
4274         int cpu;
4275
4276         *total = 0;
4277         *entries = 0;
4278
4279         for_each_tracing_cpu(cpu) {
4280                 get_total_entries_cpu(buf, &t, &e, cpu);
4281                 *total += t;
4282                 *entries += e;
4283         }
4284 }
4285
4286 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4287 {
4288         unsigned long total, entries;
4289
4290         if (!tr)
4291                 tr = &global_trace;
4292
4293         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4294
4295         return entries;
4296 }
4297
4298 unsigned long trace_total_entries(struct trace_array *tr)
4299 {
4300         unsigned long total, entries;
4301
4302         if (!tr)
4303                 tr = &global_trace;
4304
4305         get_total_entries(&tr->array_buffer, &total, &entries);
4306
4307         return entries;
4308 }
4309
4310 static void print_lat_help_header(struct seq_file *m)
4311 {
4312         seq_puts(m, "#                    _------=> CPU#            \n"
4313                     "#                   / _-----=> irqs-off/BH-disabled\n"
4314                     "#                  | / _----=> need-resched    \n"
4315                     "#                  || / _---=> hardirq/softirq \n"
4316                     "#                  ||| / _--=> preempt-depth   \n"
4317                     "#                  |||| / _-=> migrate-disable \n"
4318                     "#                  ||||| /     delay           \n"
4319                     "#  cmd     pid     |||||| time  |   caller     \n"
4320                     "#     \\   /        ||||||  \\    |    /       \n");
4321 }
4322
4323 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4324 {
4325         unsigned long total;
4326         unsigned long entries;
4327
4328         get_total_entries(buf, &total, &entries);
4329         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4330                    entries, total, num_online_cpus());
4331         seq_puts(m, "#\n");
4332 }
4333
4334 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4335                                    unsigned int flags)
4336 {
4337         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4338
4339         print_event_info(buf, m);
4340
4341         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4342         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4343 }
4344
4345 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4346                                        unsigned int flags)
4347 {
4348         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4349         static const char space[] = "            ";
4350         int prec = tgid ? 12 : 2;
4351
4352         print_event_info(buf, m);
4353
4354         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4355         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4356         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4357         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4358         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4359         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4360         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4361         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4362 }
4363
4364 void
4365 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4366 {
4367         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4368         struct array_buffer *buf = iter->array_buffer;
4369         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4370         struct tracer *type = iter->trace;
4371         unsigned long entries;
4372         unsigned long total;
4373         const char *name = type->name;
4374
4375         get_total_entries(buf, &total, &entries);
4376
4377         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4378                    name, UTS_RELEASE);
4379         seq_puts(m, "# -----------------------------------"
4380                  "---------------------------------\n");
4381         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4382                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4383                    nsecs_to_usecs(data->saved_latency),
4384                    entries,
4385                    total,
4386                    buf->cpu,
4387                    preempt_model_none()      ? "server" :
4388                    preempt_model_voluntary() ? "desktop" :
4389                    preempt_model_full()      ? "preempt" :
4390                    preempt_model_rt()        ? "preempt_rt" :
4391                    "unknown",
4392                    /* These are reserved for later use */
4393                    0, 0, 0, 0);
4394 #ifdef CONFIG_SMP
4395         seq_printf(m, " #P:%d)\n", num_online_cpus());
4396 #else
4397         seq_puts(m, ")\n");
4398 #endif
4399         seq_puts(m, "#    -----------------\n");
4400         seq_printf(m, "#    | task: %.16s-%d "
4401                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4402                    data->comm, data->pid,
4403                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4404                    data->policy, data->rt_priority);
4405         seq_puts(m, "#    -----------------\n");
4406
4407         if (data->critical_start) {
4408                 seq_puts(m, "#  => started at: ");
4409                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4410                 trace_print_seq(m, &iter->seq);
4411                 seq_puts(m, "\n#  => ended at:   ");
4412                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4413                 trace_print_seq(m, &iter->seq);
4414                 seq_puts(m, "\n#\n");
4415         }
4416
4417         seq_puts(m, "#\n");
4418 }
4419
4420 static void test_cpu_buff_start(struct trace_iterator *iter)
4421 {
4422         struct trace_seq *s = &iter->seq;
4423         struct trace_array *tr = iter->tr;
4424
4425         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4426                 return;
4427
4428         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4429                 return;
4430
4431         if (cpumask_available(iter->started) &&
4432             cpumask_test_cpu(iter->cpu, iter->started))
4433                 return;
4434
4435         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4436                 return;
4437
4438         if (cpumask_available(iter->started))
4439                 cpumask_set_cpu(iter->cpu, iter->started);
4440
4441         /* Don't print started cpu buffer for the first entry of the trace */
4442         if (iter->idx > 1)
4443                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4444                                 iter->cpu);
4445 }
4446
4447 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4448 {
4449         struct trace_array *tr = iter->tr;
4450         struct trace_seq *s = &iter->seq;
4451         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4452         struct trace_entry *entry;
4453         struct trace_event *event;
4454
4455         entry = iter->ent;
4456
4457         test_cpu_buff_start(iter);
4458
4459         event = ftrace_find_event(entry->type);
4460
4461         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4462                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4463                         trace_print_lat_context(iter);
4464                 else
4465                         trace_print_context(iter);
4466         }
4467
4468         if (trace_seq_has_overflowed(s))
4469                 return TRACE_TYPE_PARTIAL_LINE;
4470
4471         if (event) {
4472                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4473                         return print_event_fields(iter, event);
4474                 return event->funcs->trace(iter, sym_flags, event);
4475         }
4476
4477         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4478
4479         return trace_handle_return(s);
4480 }
4481
4482 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4483 {
4484         struct trace_array *tr = iter->tr;
4485         struct trace_seq *s = &iter->seq;
4486         struct trace_entry *entry;
4487         struct trace_event *event;
4488
4489         entry = iter->ent;
4490
4491         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4492                 trace_seq_printf(s, "%d %d %llu ",
4493                                  entry->pid, iter->cpu, iter->ts);
4494
4495         if (trace_seq_has_overflowed(s))
4496                 return TRACE_TYPE_PARTIAL_LINE;
4497
4498         event = ftrace_find_event(entry->type);
4499         if (event)
4500                 return event->funcs->raw(iter, 0, event);
4501
4502         trace_seq_printf(s, "%d ?\n", entry->type);
4503
4504         return trace_handle_return(s);
4505 }
4506
4507 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4508 {
4509         struct trace_array *tr = iter->tr;
4510         struct trace_seq *s = &iter->seq;
4511         unsigned char newline = '\n';
4512         struct trace_entry *entry;
4513         struct trace_event *event;
4514
4515         entry = iter->ent;
4516
4517         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4518                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4519                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4520                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4521                 if (trace_seq_has_overflowed(s))
4522                         return TRACE_TYPE_PARTIAL_LINE;
4523         }
4524
4525         event = ftrace_find_event(entry->type);
4526         if (event) {
4527                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4528                 if (ret != TRACE_TYPE_HANDLED)
4529                         return ret;
4530         }
4531
4532         SEQ_PUT_FIELD(s, newline);
4533
4534         return trace_handle_return(s);
4535 }
4536
4537 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4538 {
4539         struct trace_array *tr = iter->tr;
4540         struct trace_seq *s = &iter->seq;
4541         struct trace_entry *entry;
4542         struct trace_event *event;
4543
4544         entry = iter->ent;
4545
4546         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4547                 SEQ_PUT_FIELD(s, entry->pid);
4548                 SEQ_PUT_FIELD(s, iter->cpu);
4549                 SEQ_PUT_FIELD(s, iter->ts);
4550                 if (trace_seq_has_overflowed(s))
4551                         return TRACE_TYPE_PARTIAL_LINE;
4552         }
4553
4554         event = ftrace_find_event(entry->type);
4555         return event ? event->funcs->binary(iter, 0, event) :
4556                 TRACE_TYPE_HANDLED;
4557 }
4558
4559 int trace_empty(struct trace_iterator *iter)
4560 {
4561         struct ring_buffer_iter *buf_iter;
4562         int cpu;
4563
4564         /* If we are looking at one CPU buffer, only check that one */
4565         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4566                 cpu = iter->cpu_file;
4567                 buf_iter = trace_buffer_iter(iter, cpu);
4568                 if (buf_iter) {
4569                         if (!ring_buffer_iter_empty(buf_iter))
4570                                 return 0;
4571                 } else {
4572                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4573                                 return 0;
4574                 }
4575                 return 1;
4576         }
4577
4578         for_each_tracing_cpu(cpu) {
4579                 buf_iter = trace_buffer_iter(iter, cpu);
4580                 if (buf_iter) {
4581                         if (!ring_buffer_iter_empty(buf_iter))
4582                                 return 0;
4583                 } else {
4584                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4585                                 return 0;
4586                 }
4587         }
4588
4589         return 1;
4590 }
4591
4592 /*  Called with trace_event_read_lock() held. */
4593 enum print_line_t print_trace_line(struct trace_iterator *iter)
4594 {
4595         struct trace_array *tr = iter->tr;
4596         unsigned long trace_flags = tr->trace_flags;
4597         enum print_line_t ret;
4598
4599         if (iter->lost_events) {
4600                 if (iter->lost_events == (unsigned long)-1)
4601                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4602                                          iter->cpu);
4603                 else
4604                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4605                                          iter->cpu, iter->lost_events);
4606                 if (trace_seq_has_overflowed(&iter->seq))
4607                         return TRACE_TYPE_PARTIAL_LINE;
4608         }
4609
4610         if (iter->trace && iter->trace->print_line) {
4611                 ret = iter->trace->print_line(iter);
4612                 if (ret != TRACE_TYPE_UNHANDLED)
4613                         return ret;
4614         }
4615
4616         if (iter->ent->type == TRACE_BPUTS &&
4617                         trace_flags & TRACE_ITER_PRINTK &&
4618                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4619                 return trace_print_bputs_msg_only(iter);
4620
4621         if (iter->ent->type == TRACE_BPRINT &&
4622                         trace_flags & TRACE_ITER_PRINTK &&
4623                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4624                 return trace_print_bprintk_msg_only(iter);
4625
4626         if (iter->ent->type == TRACE_PRINT &&
4627                         trace_flags & TRACE_ITER_PRINTK &&
4628                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4629                 return trace_print_printk_msg_only(iter);
4630
4631         if (trace_flags & TRACE_ITER_BIN)
4632                 return print_bin_fmt(iter);
4633
4634         if (trace_flags & TRACE_ITER_HEX)
4635                 return print_hex_fmt(iter);
4636
4637         if (trace_flags & TRACE_ITER_RAW)
4638                 return print_raw_fmt(iter);
4639
4640         return print_trace_fmt(iter);
4641 }
4642
4643 void trace_latency_header(struct seq_file *m)
4644 {
4645         struct trace_iterator *iter = m->private;
4646         struct trace_array *tr = iter->tr;
4647
4648         /* print nothing if the buffers are empty */
4649         if (trace_empty(iter))
4650                 return;
4651
4652         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4653                 print_trace_header(m, iter);
4654
4655         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4656                 print_lat_help_header(m);
4657 }
4658
4659 void trace_default_header(struct seq_file *m)
4660 {
4661         struct trace_iterator *iter = m->private;
4662         struct trace_array *tr = iter->tr;
4663         unsigned long trace_flags = tr->trace_flags;
4664
4665         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4666                 return;
4667
4668         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4669                 /* print nothing if the buffers are empty */
4670                 if (trace_empty(iter))
4671                         return;
4672                 print_trace_header(m, iter);
4673                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4674                         print_lat_help_header(m);
4675         } else {
4676                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4677                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4678                                 print_func_help_header_irq(iter->array_buffer,
4679                                                            m, trace_flags);
4680                         else
4681                                 print_func_help_header(iter->array_buffer, m,
4682                                                        trace_flags);
4683                 }
4684         }
4685 }
4686
4687 static void test_ftrace_alive(struct seq_file *m)
4688 {
4689         if (!ftrace_is_dead())
4690                 return;
4691         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4692                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4693 }
4694
4695 #ifdef CONFIG_TRACER_MAX_TRACE
4696 static void show_snapshot_main_help(struct seq_file *m)
4697 {
4698         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4699                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4700                     "#                      Takes a snapshot of the main buffer.\n"
4701                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4702                     "#                      (Doesn't have to be '2' works with any number that\n"
4703                     "#                       is not a '0' or '1')\n");
4704 }
4705
4706 static void show_snapshot_percpu_help(struct seq_file *m)
4707 {
4708         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4709 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4710         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4711                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4712 #else
4713         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4714                     "#                     Must use main snapshot file to allocate.\n");
4715 #endif
4716         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4717                     "#                      (Doesn't have to be '2' works with any number that\n"
4718                     "#                       is not a '0' or '1')\n");
4719 }
4720
4721 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4722 {
4723         if (iter->tr->allocated_snapshot)
4724                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4725         else
4726                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4727
4728         seq_puts(m, "# Snapshot commands:\n");
4729         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4730                 show_snapshot_main_help(m);
4731         else
4732                 show_snapshot_percpu_help(m);
4733 }
4734 #else
4735 /* Should never be called */
4736 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4737 #endif
4738
4739 static int s_show(struct seq_file *m, void *v)
4740 {
4741         struct trace_iterator *iter = v;
4742         int ret;
4743
4744         if (iter->ent == NULL) {
4745                 if (iter->tr) {
4746                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4747                         seq_puts(m, "#\n");
4748                         test_ftrace_alive(m);
4749                 }
4750                 if (iter->snapshot && trace_empty(iter))
4751                         print_snapshot_help(m, iter);
4752                 else if (iter->trace && iter->trace->print_header)
4753                         iter->trace->print_header(m);
4754                 else
4755                         trace_default_header(m);
4756
4757         } else if (iter->leftover) {
4758                 /*
4759                  * If we filled the seq_file buffer earlier, we
4760                  * want to just show it now.
4761                  */
4762                 ret = trace_print_seq(m, &iter->seq);
4763
4764                 /* ret should this time be zero, but you never know */
4765                 iter->leftover = ret;
4766
4767         } else {
4768                 print_trace_line(iter);
4769                 ret = trace_print_seq(m, &iter->seq);
4770                 /*
4771                  * If we overflow the seq_file buffer, then it will
4772                  * ask us for this data again at start up.
4773                  * Use that instead.
4774                  *  ret is 0 if seq_file write succeeded.
4775                  *        -1 otherwise.
4776                  */
4777                 iter->leftover = ret;
4778         }
4779
4780         return 0;
4781 }
4782
4783 /*
4784  * Should be used after trace_array_get(), trace_types_lock
4785  * ensures that i_cdev was already initialized.
4786  */
4787 static inline int tracing_get_cpu(struct inode *inode)
4788 {
4789         if (inode->i_cdev) /* See trace_create_cpu_file() */
4790                 return (long)inode->i_cdev - 1;
4791         return RING_BUFFER_ALL_CPUS;
4792 }
4793
4794 static const struct seq_operations tracer_seq_ops = {
4795         .start          = s_start,
4796         .next           = s_next,
4797         .stop           = s_stop,
4798         .show           = s_show,
4799 };
4800
4801 /*
4802  * Note, as iter itself can be allocated and freed in different
4803  * ways, this function is only used to free its content, and not
4804  * the iterator itself. The only requirement to all the allocations
4805  * is that it must zero all fields (kzalloc), as freeing works with
4806  * ethier allocated content or NULL.
4807  */
4808 static void free_trace_iter_content(struct trace_iterator *iter)
4809 {
4810         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4811         if (iter->fmt != static_fmt_buf)
4812                 kfree(iter->fmt);
4813
4814         kfree(iter->temp);
4815         kfree(iter->buffer_iter);
4816         mutex_destroy(&iter->mutex);
4817         free_cpumask_var(iter->started);
4818 }
4819
4820 static struct trace_iterator *
4821 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4822 {
4823         struct trace_array *tr = inode->i_private;
4824         struct trace_iterator *iter;
4825         int cpu;
4826
4827         if (tracing_disabled)
4828                 return ERR_PTR(-ENODEV);
4829
4830         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4831         if (!iter)
4832                 return ERR_PTR(-ENOMEM);
4833
4834         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4835                                     GFP_KERNEL);
4836         if (!iter->buffer_iter)
4837                 goto release;
4838
4839         /*
4840          * trace_find_next_entry() may need to save off iter->ent.
4841          * It will place it into the iter->temp buffer. As most
4842          * events are less than 128, allocate a buffer of that size.
4843          * If one is greater, then trace_find_next_entry() will
4844          * allocate a new buffer to adjust for the bigger iter->ent.
4845          * It's not critical if it fails to get allocated here.
4846          */
4847         iter->temp = kmalloc(128, GFP_KERNEL);
4848         if (iter->temp)
4849                 iter->temp_size = 128;
4850
4851         /*
4852          * trace_event_printf() may need to modify given format
4853          * string to replace %p with %px so that it shows real address
4854          * instead of hash value. However, that is only for the event
4855          * tracing, other tracer may not need. Defer the allocation
4856          * until it is needed.
4857          */
4858         iter->fmt = NULL;
4859         iter->fmt_size = 0;
4860
4861         mutex_lock(&trace_types_lock);
4862         iter->trace = tr->current_trace;
4863
4864         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4865                 goto fail;
4866
4867         iter->tr = tr;
4868
4869 #ifdef CONFIG_TRACER_MAX_TRACE
4870         /* Currently only the top directory has a snapshot */
4871         if (tr->current_trace->print_max || snapshot)
4872                 iter->array_buffer = &tr->max_buffer;
4873         else
4874 #endif
4875                 iter->array_buffer = &tr->array_buffer;
4876         iter->snapshot = snapshot;
4877         iter->pos = -1;
4878         iter->cpu_file = tracing_get_cpu(inode);
4879         mutex_init(&iter->mutex);
4880
4881         /* Notify the tracer early; before we stop tracing. */
4882         if (iter->trace->open)
4883                 iter->trace->open(iter);
4884
4885         /* Annotate start of buffers if we had overruns */
4886         if (ring_buffer_overruns(iter->array_buffer->buffer))
4887                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4888
4889         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4890         if (trace_clocks[tr->clock_id].in_ns)
4891                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4892
4893         /*
4894          * If pause-on-trace is enabled, then stop the trace while
4895          * dumping, unless this is the "snapshot" file
4896          */
4897         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4898                 tracing_stop_tr(tr);
4899
4900         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4901                 for_each_tracing_cpu(cpu) {
4902                         iter->buffer_iter[cpu] =
4903                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4904                                                          cpu, GFP_KERNEL);
4905                 }
4906                 ring_buffer_read_prepare_sync();
4907                 for_each_tracing_cpu(cpu) {
4908                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4909                         tracing_iter_reset(iter, cpu);
4910                 }
4911         } else {
4912                 cpu = iter->cpu_file;
4913                 iter->buffer_iter[cpu] =
4914                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4915                                                  cpu, GFP_KERNEL);
4916                 ring_buffer_read_prepare_sync();
4917                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4918                 tracing_iter_reset(iter, cpu);
4919         }
4920
4921         mutex_unlock(&trace_types_lock);
4922
4923         return iter;
4924
4925  fail:
4926         mutex_unlock(&trace_types_lock);
4927         free_trace_iter_content(iter);
4928 release:
4929         seq_release_private(inode, file);
4930         return ERR_PTR(-ENOMEM);
4931 }
4932
4933 int tracing_open_generic(struct inode *inode, struct file *filp)
4934 {
4935         int ret;
4936
4937         ret = tracing_check_open_get_tr(NULL);
4938         if (ret)
4939                 return ret;
4940
4941         filp->private_data = inode->i_private;
4942         return 0;
4943 }
4944
4945 bool tracing_is_disabled(void)
4946 {
4947         return (tracing_disabled) ? true: false;
4948 }
4949
4950 /*
4951  * Open and update trace_array ref count.
4952  * Must have the current trace_array passed to it.
4953  */
4954 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4955 {
4956         struct trace_array *tr = inode->i_private;
4957         int ret;
4958
4959         ret = tracing_check_open_get_tr(tr);
4960         if (ret)
4961                 return ret;
4962
4963         filp->private_data = inode->i_private;
4964
4965         return 0;
4966 }
4967
4968 /*
4969  * The private pointer of the inode is the trace_event_file.
4970  * Update the tr ref count associated to it.
4971  */
4972 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4973 {
4974         struct trace_event_file *file = inode->i_private;
4975         int ret;
4976
4977         ret = tracing_check_open_get_tr(file->tr);
4978         if (ret)
4979                 return ret;
4980
4981         mutex_lock(&event_mutex);
4982
4983         /* Fail if the file is marked for removal */
4984         if (file->flags & EVENT_FILE_FL_FREED) {
4985                 trace_array_put(file->tr);
4986                 ret = -ENODEV;
4987         } else {
4988                 event_file_get(file);
4989         }
4990
4991         mutex_unlock(&event_mutex);
4992         if (ret)
4993                 return ret;
4994
4995         filp->private_data = inode->i_private;
4996
4997         return 0;
4998 }
4999
5000 int tracing_release_file_tr(struct inode *inode, struct file *filp)
5001 {
5002         struct trace_event_file *file = inode->i_private;
5003
5004         trace_array_put(file->tr);
5005         event_file_put(file);
5006
5007         return 0;
5008 }
5009
5010 static int tracing_mark_open(struct inode *inode, struct file *filp)
5011 {
5012         stream_open(inode, filp);
5013         return tracing_open_generic_tr(inode, filp);
5014 }
5015
5016 static int tracing_release(struct inode *inode, struct file *file)
5017 {
5018         struct trace_array *tr = inode->i_private;
5019         struct seq_file *m = file->private_data;
5020         struct trace_iterator *iter;
5021         int cpu;
5022
5023         if (!(file->f_mode & FMODE_READ)) {
5024                 trace_array_put(tr);
5025                 return 0;
5026         }
5027
5028         /* Writes do not use seq_file */
5029         iter = m->private;
5030         mutex_lock(&trace_types_lock);
5031
5032         for_each_tracing_cpu(cpu) {
5033                 if (iter->buffer_iter[cpu])
5034                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5035         }
5036
5037         if (iter->trace && iter->trace->close)
5038                 iter->trace->close(iter);
5039
5040         if (!iter->snapshot && tr->stop_count)
5041                 /* reenable tracing if it was previously enabled */
5042                 tracing_start_tr(tr);
5043
5044         __trace_array_put(tr);
5045
5046         mutex_unlock(&trace_types_lock);
5047
5048         free_trace_iter_content(iter);
5049         seq_release_private(inode, file);
5050
5051         return 0;
5052 }
5053
5054 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5055 {
5056         struct trace_array *tr = inode->i_private;
5057
5058         trace_array_put(tr);
5059         return 0;
5060 }
5061
5062 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5063 {
5064         struct trace_array *tr = inode->i_private;
5065
5066         trace_array_put(tr);
5067
5068         return single_release(inode, file);
5069 }
5070
5071 static int tracing_open(struct inode *inode, struct file *file)
5072 {
5073         struct trace_array *tr = inode->i_private;
5074         struct trace_iterator *iter;
5075         int ret;
5076
5077         ret = tracing_check_open_get_tr(tr);
5078         if (ret)
5079                 return ret;
5080
5081         /* If this file was open for write, then erase contents */
5082         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5083                 int cpu = tracing_get_cpu(inode);
5084                 struct array_buffer *trace_buf = &tr->array_buffer;
5085
5086 #ifdef CONFIG_TRACER_MAX_TRACE
5087                 if (tr->current_trace->print_max)
5088                         trace_buf = &tr->max_buffer;
5089 #endif
5090
5091                 if (cpu == RING_BUFFER_ALL_CPUS)
5092                         tracing_reset_online_cpus(trace_buf);
5093                 else
5094                         tracing_reset_cpu(trace_buf, cpu);
5095         }
5096
5097         if (file->f_mode & FMODE_READ) {
5098                 iter = __tracing_open(inode, file, false);
5099                 if (IS_ERR(iter))
5100                         ret = PTR_ERR(iter);
5101                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5102                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5103         }
5104
5105         if (ret < 0)
5106                 trace_array_put(tr);
5107
5108         return ret;
5109 }
5110
5111 /*
5112  * Some tracers are not suitable for instance buffers.
5113  * A tracer is always available for the global array (toplevel)
5114  * or if it explicitly states that it is.
5115  */
5116 static bool
5117 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5118 {
5119         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5120 }
5121
5122 /* Find the next tracer that this trace array may use */
5123 static struct tracer *
5124 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5125 {
5126         while (t && !trace_ok_for_array(t, tr))
5127                 t = t->next;
5128
5129         return t;
5130 }
5131
5132 static void *
5133 t_next(struct seq_file *m, void *v, loff_t *pos)
5134 {
5135         struct trace_array *tr = m->private;
5136         struct tracer *t = v;
5137
5138         (*pos)++;
5139
5140         if (t)
5141                 t = get_tracer_for_array(tr, t->next);
5142
5143         return t;
5144 }
5145
5146 static void *t_start(struct seq_file *m, loff_t *pos)
5147 {
5148         struct trace_array *tr = m->private;
5149         struct tracer *t;
5150         loff_t l = 0;
5151
5152         mutex_lock(&trace_types_lock);
5153
5154         t = get_tracer_for_array(tr, trace_types);
5155         for (; t && l < *pos; t = t_next(m, t, &l))
5156                         ;
5157
5158         return t;
5159 }
5160
5161 static void t_stop(struct seq_file *m, void *p)
5162 {
5163         mutex_unlock(&trace_types_lock);
5164 }
5165
5166 static int t_show(struct seq_file *m, void *v)
5167 {
5168         struct tracer *t = v;
5169
5170         if (!t)
5171                 return 0;
5172
5173         seq_puts(m, t->name);
5174         if (t->next)
5175                 seq_putc(m, ' ');
5176         else
5177                 seq_putc(m, '\n');
5178
5179         return 0;
5180 }
5181
5182 static const struct seq_operations show_traces_seq_ops = {
5183         .start          = t_start,
5184         .next           = t_next,
5185         .stop           = t_stop,
5186         .show           = t_show,
5187 };
5188
5189 static int show_traces_open(struct inode *inode, struct file *file)
5190 {
5191         struct trace_array *tr = inode->i_private;
5192         struct seq_file *m;
5193         int ret;
5194
5195         ret = tracing_check_open_get_tr(tr);
5196         if (ret)
5197                 return ret;
5198
5199         ret = seq_open(file, &show_traces_seq_ops);
5200         if (ret) {
5201                 trace_array_put(tr);
5202                 return ret;
5203         }
5204
5205         m = file->private_data;
5206         m->private = tr;
5207
5208         return 0;
5209 }
5210
5211 static int show_traces_release(struct inode *inode, struct file *file)
5212 {
5213         struct trace_array *tr = inode->i_private;
5214
5215         trace_array_put(tr);
5216         return seq_release(inode, file);
5217 }
5218
5219 static ssize_t
5220 tracing_write_stub(struct file *filp, const char __user *ubuf,
5221                    size_t count, loff_t *ppos)
5222 {
5223         return count;
5224 }
5225
5226 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5227 {
5228         int ret;
5229
5230         if (file->f_mode & FMODE_READ)
5231                 ret = seq_lseek(file, offset, whence);
5232         else
5233                 file->f_pos = ret = 0;
5234
5235         return ret;
5236 }
5237
5238 static const struct file_operations tracing_fops = {
5239         .open           = tracing_open,
5240         .read           = seq_read,
5241         .read_iter      = seq_read_iter,
5242         .splice_read    = copy_splice_read,
5243         .write          = tracing_write_stub,
5244         .llseek         = tracing_lseek,
5245         .release        = tracing_release,
5246 };
5247
5248 static const struct file_operations show_traces_fops = {
5249         .open           = show_traces_open,
5250         .read           = seq_read,
5251         .llseek         = seq_lseek,
5252         .release        = show_traces_release,
5253 };
5254
5255 static ssize_t
5256 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5257                      size_t count, loff_t *ppos)
5258 {
5259         struct trace_array *tr = file_inode(filp)->i_private;
5260         char *mask_str;
5261         int len;
5262
5263         len = snprintf(NULL, 0, "%*pb\n",
5264                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5265         mask_str = kmalloc(len, GFP_KERNEL);
5266         if (!mask_str)
5267                 return -ENOMEM;
5268
5269         len = snprintf(mask_str, len, "%*pb\n",
5270                        cpumask_pr_args(tr->tracing_cpumask));
5271         if (len >= count) {
5272                 count = -EINVAL;
5273                 goto out_err;
5274         }
5275         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5276
5277 out_err:
5278         kfree(mask_str);
5279
5280         return count;
5281 }
5282
5283 int tracing_set_cpumask(struct trace_array *tr,
5284                         cpumask_var_t tracing_cpumask_new)
5285 {
5286         int cpu;
5287
5288         if (!tr)
5289                 return -EINVAL;
5290
5291         local_irq_disable();
5292         arch_spin_lock(&tr->max_lock);
5293         for_each_tracing_cpu(cpu) {
5294                 /*
5295                  * Increase/decrease the disabled counter if we are
5296                  * about to flip a bit in the cpumask:
5297                  */
5298                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5299                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5300                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5301                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5302 #ifdef CONFIG_TRACER_MAX_TRACE
5303                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5304 #endif
5305                 }
5306                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5307                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5308                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5309                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5310 #ifdef CONFIG_TRACER_MAX_TRACE
5311                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5312 #endif
5313                 }
5314         }
5315         arch_spin_unlock(&tr->max_lock);
5316         local_irq_enable();
5317
5318         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5319
5320         return 0;
5321 }
5322
5323 static ssize_t
5324 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5325                       size_t count, loff_t *ppos)
5326 {
5327         struct trace_array *tr = file_inode(filp)->i_private;
5328         cpumask_var_t tracing_cpumask_new;
5329         int err;
5330
5331         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5332                 return -ENOMEM;
5333
5334         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5335         if (err)
5336                 goto err_free;
5337
5338         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5339         if (err)
5340                 goto err_free;
5341
5342         free_cpumask_var(tracing_cpumask_new);
5343
5344         return count;
5345
5346 err_free:
5347         free_cpumask_var(tracing_cpumask_new);
5348
5349         return err;
5350 }
5351
5352 static const struct file_operations tracing_cpumask_fops = {
5353         .open           = tracing_open_generic_tr,
5354         .read           = tracing_cpumask_read,
5355         .write          = tracing_cpumask_write,
5356         .release        = tracing_release_generic_tr,
5357         .llseek         = generic_file_llseek,
5358 };
5359
5360 static int tracing_trace_options_show(struct seq_file *m, void *v)
5361 {
5362         struct tracer_opt *trace_opts;
5363         struct trace_array *tr = m->private;
5364         u32 tracer_flags;
5365         int i;
5366
5367         mutex_lock(&trace_types_lock);
5368         tracer_flags = tr->current_trace->flags->val;
5369         trace_opts = tr->current_trace->flags->opts;
5370
5371         for (i = 0; trace_options[i]; i++) {
5372                 if (tr->trace_flags & (1 << i))
5373                         seq_printf(m, "%s\n", trace_options[i]);
5374                 else
5375                         seq_printf(m, "no%s\n", trace_options[i]);
5376         }
5377
5378         for (i = 0; trace_opts[i].name; i++) {
5379                 if (tracer_flags & trace_opts[i].bit)
5380                         seq_printf(m, "%s\n", trace_opts[i].name);
5381                 else
5382                         seq_printf(m, "no%s\n", trace_opts[i].name);
5383         }
5384         mutex_unlock(&trace_types_lock);
5385
5386         return 0;
5387 }
5388
5389 static int __set_tracer_option(struct trace_array *tr,
5390                                struct tracer_flags *tracer_flags,
5391                                struct tracer_opt *opts, int neg)
5392 {
5393         struct tracer *trace = tracer_flags->trace;
5394         int ret;
5395
5396         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5397         if (ret)
5398                 return ret;
5399
5400         if (neg)
5401                 tracer_flags->val &= ~opts->bit;
5402         else
5403                 tracer_flags->val |= opts->bit;
5404         return 0;
5405 }
5406
5407 /* Try to assign a tracer specific option */
5408 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5409 {
5410         struct tracer *trace = tr->current_trace;
5411         struct tracer_flags *tracer_flags = trace->flags;
5412         struct tracer_opt *opts = NULL;
5413         int i;
5414
5415         for (i = 0; tracer_flags->opts[i].name; i++) {
5416                 opts = &tracer_flags->opts[i];
5417
5418                 if (strcmp(cmp, opts->name) == 0)
5419                         return __set_tracer_option(tr, trace->flags, opts, neg);
5420         }
5421
5422         return -EINVAL;
5423 }
5424
5425 /* Some tracers require overwrite to stay enabled */
5426 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5427 {
5428         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5429                 return -1;
5430
5431         return 0;
5432 }
5433
5434 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5435 {
5436         int *map;
5437
5438         if ((mask == TRACE_ITER_RECORD_TGID) ||
5439             (mask == TRACE_ITER_RECORD_CMD))
5440                 lockdep_assert_held(&event_mutex);
5441
5442         /* do nothing if flag is already set */
5443         if (!!(tr->trace_flags & mask) == !!enabled)
5444                 return 0;
5445
5446         /* Give the tracer a chance to approve the change */
5447         if (tr->current_trace->flag_changed)
5448                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5449                         return -EINVAL;
5450
5451         if (enabled)
5452                 tr->trace_flags |= mask;
5453         else
5454                 tr->trace_flags &= ~mask;
5455
5456         if (mask == TRACE_ITER_RECORD_CMD)
5457                 trace_event_enable_cmd_record(enabled);
5458
5459         if (mask == TRACE_ITER_RECORD_TGID) {
5460                 if (!tgid_map) {
5461                         tgid_map_max = pid_max;
5462                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5463                                        GFP_KERNEL);
5464
5465                         /*
5466                          * Pairs with smp_load_acquire() in
5467                          * trace_find_tgid_ptr() to ensure that if it observes
5468                          * the tgid_map we just allocated then it also observes
5469                          * the corresponding tgid_map_max value.
5470                          */
5471                         smp_store_release(&tgid_map, map);
5472                 }
5473                 if (!tgid_map) {
5474                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5475                         return -ENOMEM;
5476                 }
5477
5478                 trace_event_enable_tgid_record(enabled);
5479         }
5480
5481         if (mask == TRACE_ITER_EVENT_FORK)
5482                 trace_event_follow_fork(tr, enabled);
5483
5484         if (mask == TRACE_ITER_FUNC_FORK)
5485                 ftrace_pid_follow_fork(tr, enabled);
5486
5487         if (mask == TRACE_ITER_OVERWRITE) {
5488                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5489 #ifdef CONFIG_TRACER_MAX_TRACE
5490                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5491 #endif
5492         }
5493
5494         if (mask == TRACE_ITER_PRINTK) {
5495                 trace_printk_start_stop_comm(enabled);
5496                 trace_printk_control(enabled);
5497         }
5498
5499         return 0;
5500 }
5501
5502 int trace_set_options(struct trace_array *tr, char *option)
5503 {
5504         char *cmp;
5505         int neg = 0;
5506         int ret;
5507         size_t orig_len = strlen(option);
5508         int len;
5509
5510         cmp = strstrip(option);
5511
5512         len = str_has_prefix(cmp, "no");
5513         if (len)
5514                 neg = 1;
5515
5516         cmp += len;
5517
5518         mutex_lock(&event_mutex);
5519         mutex_lock(&trace_types_lock);
5520
5521         ret = match_string(trace_options, -1, cmp);
5522         /* If no option could be set, test the specific tracer options */
5523         if (ret < 0)
5524                 ret = set_tracer_option(tr, cmp, neg);
5525         else
5526                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5527
5528         mutex_unlock(&trace_types_lock);
5529         mutex_unlock(&event_mutex);
5530
5531         /*
5532          * If the first trailing whitespace is replaced with '\0' by strstrip,
5533          * turn it back into a space.
5534          */
5535         if (orig_len > strlen(option))
5536                 option[strlen(option)] = ' ';
5537
5538         return ret;
5539 }
5540
5541 static void __init apply_trace_boot_options(void)
5542 {
5543         char *buf = trace_boot_options_buf;
5544         char *option;
5545
5546         while (true) {
5547                 option = strsep(&buf, ",");
5548
5549                 if (!option)
5550                         break;
5551
5552                 if (*option)
5553                         trace_set_options(&global_trace, option);
5554
5555                 /* Put back the comma to allow this to be called again */
5556                 if (buf)
5557                         *(buf - 1) = ',';
5558         }
5559 }
5560
5561 static ssize_t
5562 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5563                         size_t cnt, loff_t *ppos)
5564 {
5565         struct seq_file *m = filp->private_data;
5566         struct trace_array *tr = m->private;
5567         char buf[64];
5568         int ret;
5569
5570         if (cnt >= sizeof(buf))
5571                 return -EINVAL;
5572
5573         if (copy_from_user(buf, ubuf, cnt))
5574                 return -EFAULT;
5575
5576         buf[cnt] = 0;
5577
5578         ret = trace_set_options(tr, buf);
5579         if (ret < 0)
5580                 return ret;
5581
5582         *ppos += cnt;
5583
5584         return cnt;
5585 }
5586
5587 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5588 {
5589         struct trace_array *tr = inode->i_private;
5590         int ret;
5591
5592         ret = tracing_check_open_get_tr(tr);
5593         if (ret)
5594                 return ret;
5595
5596         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5597         if (ret < 0)
5598                 trace_array_put(tr);
5599
5600         return ret;
5601 }
5602
5603 static const struct file_operations tracing_iter_fops = {
5604         .open           = tracing_trace_options_open,
5605         .read           = seq_read,
5606         .llseek         = seq_lseek,
5607         .release        = tracing_single_release_tr,
5608         .write          = tracing_trace_options_write,
5609 };
5610
5611 static const char readme_msg[] =
5612         "tracing mini-HOWTO:\n\n"
5613         "# echo 0 > tracing_on : quick way to disable tracing\n"
5614         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5615         " Important files:\n"
5616         "  trace\t\t\t- The static contents of the buffer\n"
5617         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5618         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5619         "  current_tracer\t- function and latency tracers\n"
5620         "  available_tracers\t- list of configured tracers for current_tracer\n"
5621         "  error_log\t- error log for failed commands (that support it)\n"
5622         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5623         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5624         "  trace_clock\t\t- change the clock used to order events\n"
5625         "       local:   Per cpu clock but may not be synced across CPUs\n"
5626         "      global:   Synced across CPUs but slows tracing down.\n"
5627         "     counter:   Not a clock, but just an increment\n"
5628         "      uptime:   Jiffy counter from time of boot\n"
5629         "        perf:   Same clock that perf events use\n"
5630 #ifdef CONFIG_X86_64
5631         "     x86-tsc:   TSC cycle counter\n"
5632 #endif
5633         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5634         "       delta:   Delta difference against a buffer-wide timestamp\n"
5635         "    absolute:   Absolute (standalone) timestamp\n"
5636         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5637         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5638         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5639         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5640         "\t\t\t  Remove sub-buffer with rmdir\n"
5641         "  trace_options\t\t- Set format or modify how tracing happens\n"
5642         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5643         "\t\t\t  option name\n"
5644         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5645 #ifdef CONFIG_DYNAMIC_FTRACE
5646         "\n  available_filter_functions - list of functions that can be filtered on\n"
5647         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5648         "\t\t\t  functions\n"
5649         "\t     accepts: func_full_name or glob-matching-pattern\n"
5650         "\t     modules: Can select a group via module\n"
5651         "\t      Format: :mod:<module-name>\n"
5652         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5653         "\t    triggers: a command to perform when function is hit\n"
5654         "\t      Format: <function>:<trigger>[:count]\n"
5655         "\t     trigger: traceon, traceoff\n"
5656         "\t\t      enable_event:<system>:<event>\n"
5657         "\t\t      disable_event:<system>:<event>\n"
5658 #ifdef CONFIG_STACKTRACE
5659         "\t\t      stacktrace\n"
5660 #endif
5661 #ifdef CONFIG_TRACER_SNAPSHOT
5662         "\t\t      snapshot\n"
5663 #endif
5664         "\t\t      dump\n"
5665         "\t\t      cpudump\n"
5666         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5667         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5668         "\t     The first one will disable tracing every time do_fault is hit\n"
5669         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5670         "\t       The first time do trap is hit and it disables tracing, the\n"
5671         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5672         "\t       the counter will not decrement. It only decrements when the\n"
5673         "\t       trigger did work\n"
5674         "\t     To remove trigger without count:\n"
5675         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5676         "\t     To remove trigger with a count:\n"
5677         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5678         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5679         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5680         "\t    modules: Can select a group via module command :mod:\n"
5681         "\t    Does not accept triggers\n"
5682 #endif /* CONFIG_DYNAMIC_FTRACE */
5683 #ifdef CONFIG_FUNCTION_TRACER
5684         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5685         "\t\t    (function)\n"
5686         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5687         "\t\t    (function)\n"
5688 #endif
5689 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5690         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5691         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5692         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5693 #endif
5694 #ifdef CONFIG_TRACER_SNAPSHOT
5695         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5696         "\t\t\t  snapshot buffer. Read the contents for more\n"
5697         "\t\t\t  information\n"
5698 #endif
5699 #ifdef CONFIG_STACK_TRACER
5700         "  stack_trace\t\t- Shows the max stack trace when active\n"
5701         "  stack_max_size\t- Shows current max stack size that was traced\n"
5702         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5703         "\t\t\t  new trace)\n"
5704 #ifdef CONFIG_DYNAMIC_FTRACE
5705         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5706         "\t\t\t  traces\n"
5707 #endif
5708 #endif /* CONFIG_STACK_TRACER */
5709 #ifdef CONFIG_DYNAMIC_EVENTS
5710         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5711         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5712 #endif
5713 #ifdef CONFIG_KPROBE_EVENTS
5714         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5715         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5716 #endif
5717 #ifdef CONFIG_UPROBE_EVENTS
5718         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5719         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5720 #endif
5721 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5722     defined(CONFIG_FPROBE_EVENTS)
5723         "\t  accepts: event-definitions (one definition per line)\n"
5724 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5725         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5726         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5727 #endif
5728 #ifdef CONFIG_FPROBE_EVENTS
5729         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5730         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5731 #endif
5732 #ifdef CONFIG_HIST_TRIGGERS
5733         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5734 #endif
5735         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5736         "\t           -:[<group>/][<event>]\n"
5737 #ifdef CONFIG_KPROBE_EVENTS
5738         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5739   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5740 #endif
5741 #ifdef CONFIG_UPROBE_EVENTS
5742   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5743 #endif
5744         "\t     args: <name>=fetcharg[:type]\n"
5745         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5746 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5747 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5748         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5749         "\t           <argname>[->field[->field|.field...]],\n"
5750 #else
5751         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752 #endif
5753 #else
5754         "\t           $stack<index>, $stack, $retval, $comm,\n"
5755 #endif
5756         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5757         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5758         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5759         "\t           symstr, <type>\\[<array-size>\\]\n"
5760 #ifdef CONFIG_HIST_TRIGGERS
5761         "\t    field: <stype> <name>;\n"
5762         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5763         "\t           [unsigned] char/int/long\n"
5764 #endif
5765         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5766         "\t            of the <attached-group>/<attached-event>.\n"
5767 #endif
5768         "  events/\t\t- Directory containing all trace event subsystems:\n"
5769         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5770         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5771         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5772         "\t\t\t  events\n"
5773         "      filter\t\t- If set, only events passing filter are traced\n"
5774         "  events/<system>/<event>/\t- Directory containing control files for\n"
5775         "\t\t\t  <event>:\n"
5776         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5777         "      filter\t\t- If set, only events passing filter are traced\n"
5778         "      trigger\t\t- If set, a command to perform when event is hit\n"
5779         "\t    Format: <trigger>[:count][if <filter>]\n"
5780         "\t   trigger: traceon, traceoff\n"
5781         "\t            enable_event:<system>:<event>\n"
5782         "\t            disable_event:<system>:<event>\n"
5783 #ifdef CONFIG_HIST_TRIGGERS
5784         "\t            enable_hist:<system>:<event>\n"
5785         "\t            disable_hist:<system>:<event>\n"
5786 #endif
5787 #ifdef CONFIG_STACKTRACE
5788         "\t\t    stacktrace\n"
5789 #endif
5790 #ifdef CONFIG_TRACER_SNAPSHOT
5791         "\t\t    snapshot\n"
5792 #endif
5793 #ifdef CONFIG_HIST_TRIGGERS
5794         "\t\t    hist (see below)\n"
5795 #endif
5796         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5797         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5798         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5799         "\t                  events/block/block_unplug/trigger\n"
5800         "\t   The first disables tracing every time block_unplug is hit.\n"
5801         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5802         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5803         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5804         "\t   Like function triggers, the counter is only decremented if it\n"
5805         "\t    enabled or disabled tracing.\n"
5806         "\t   To remove a trigger without a count:\n"
5807         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5808         "\t   To remove a trigger with a count:\n"
5809         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5810         "\t   Filters can be ignored when removing a trigger.\n"
5811 #ifdef CONFIG_HIST_TRIGGERS
5812         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5813         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5814         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5815         "\t            [:values=<field1[,field2,...]>]\n"
5816         "\t            [:sort=<field1[,field2,...]>]\n"
5817         "\t            [:size=#entries]\n"
5818         "\t            [:pause][:continue][:clear]\n"
5819         "\t            [:name=histname1]\n"
5820         "\t            [:nohitcount]\n"
5821         "\t            [:<handler>.<action>]\n"
5822         "\t            [if <filter>]\n\n"
5823         "\t    Note, special fields can be used as well:\n"
5824         "\t            common_timestamp - to record current timestamp\n"
5825         "\t            common_cpu - to record the CPU the event happened on\n"
5826         "\n"
5827         "\t    A hist trigger variable can be:\n"
5828         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5829         "\t        - a reference to another variable e.g. y=$x,\n"
5830         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5831         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5832         "\n"
5833         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5834         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5835         "\t    variable reference, field or numeric literal.\n"
5836         "\n"
5837         "\t    When a matching event is hit, an entry is added to a hash\n"
5838         "\t    table using the key(s) and value(s) named, and the value of a\n"
5839         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5840         "\t    correspond to fields in the event's format description.  Keys\n"
5841         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5842         "\t    Compound keys consisting of up to two fields can be specified\n"
5843         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5844         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5845         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5846         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5847         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5848         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5849         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5850         "\t    its histogram data will be shared with other triggers of the\n"
5851         "\t    same name, and trigger hits will update this common data.\n\n"
5852         "\t    Reading the 'hist' file for the event will dump the hash\n"
5853         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5854         "\t    triggers attached to an event, there will be a table for each\n"
5855         "\t    trigger in the output.  The table displayed for a named\n"
5856         "\t    trigger will be the same as any other instance having the\n"
5857         "\t    same name.  The default format used to display a given field\n"
5858         "\t    can be modified by appending any of the following modifiers\n"
5859         "\t    to the field name, as applicable:\n\n"
5860         "\t            .hex        display a number as a hex value\n"
5861         "\t            .sym        display an address as a symbol\n"
5862         "\t            .sym-offset display an address as a symbol and offset\n"
5863         "\t            .execname   display a common_pid as a program name\n"
5864         "\t            .syscall    display a syscall id as a syscall name\n"
5865         "\t            .log2       display log2 value rather than raw number\n"
5866         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5867         "\t            .usecs      display a common_timestamp in microseconds\n"
5868         "\t            .percent    display a number of percentage value\n"
5869         "\t            .graph      display a bar-graph of a value\n\n"
5870         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5871         "\t    trigger or to start a hist trigger but not log any events\n"
5872         "\t    until told to do so.  'continue' can be used to start or\n"
5873         "\t    restart a paused hist trigger.\n\n"
5874         "\t    The 'clear' parameter will clear the contents of a running\n"
5875         "\t    hist trigger and leave its current paused/active state\n"
5876         "\t    unchanged.\n\n"
5877         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5878         "\t    raw hitcount in the histogram.\n\n"
5879         "\t    The enable_hist and disable_hist triggers can be used to\n"
5880         "\t    have one event conditionally start and stop another event's\n"
5881         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5882         "\t    the enable_event and disable_event triggers.\n\n"
5883         "\t    Hist trigger handlers and actions are executed whenever a\n"
5884         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5885         "\t        <handler>.<action>\n\n"
5886         "\t    The available handlers are:\n\n"
5887         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5888         "\t        onmax(var)               - invoke if var exceeds current max\n"
5889         "\t        onchange(var)            - invoke action if var changes\n\n"
5890         "\t    The available actions are:\n\n"
5891         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5892         "\t        save(field,...)                      - save current event fields\n"
5893 #ifdef CONFIG_TRACER_SNAPSHOT
5894         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5895 #endif
5896 #ifdef CONFIG_SYNTH_EVENTS
5897         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5898         "\t  Write into this file to define/undefine new synthetic events.\n"
5899         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5900 #endif
5901 #endif
5902 ;
5903
5904 static ssize_t
5905 tracing_readme_read(struct file *filp, char __user *ubuf,
5906                        size_t cnt, loff_t *ppos)
5907 {
5908         return simple_read_from_buffer(ubuf, cnt, ppos,
5909                                         readme_msg, strlen(readme_msg));
5910 }
5911
5912 static const struct file_operations tracing_readme_fops = {
5913         .open           = tracing_open_generic,
5914         .read           = tracing_readme_read,
5915         .llseek         = generic_file_llseek,
5916 };
5917
5918 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5919 {
5920         int pid = ++(*pos);
5921
5922         return trace_find_tgid_ptr(pid);
5923 }
5924
5925 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5926 {
5927         int pid = *pos;
5928
5929         return trace_find_tgid_ptr(pid);
5930 }
5931
5932 static void saved_tgids_stop(struct seq_file *m, void *v)
5933 {
5934 }
5935
5936 static int saved_tgids_show(struct seq_file *m, void *v)
5937 {
5938         int *entry = (int *)v;
5939         int pid = entry - tgid_map;
5940         int tgid = *entry;
5941
5942         if (tgid == 0)
5943                 return SEQ_SKIP;
5944
5945         seq_printf(m, "%d %d\n", pid, tgid);
5946         return 0;
5947 }
5948
5949 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5950         .start          = saved_tgids_start,
5951         .stop           = saved_tgids_stop,
5952         .next           = saved_tgids_next,
5953         .show           = saved_tgids_show,
5954 };
5955
5956 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5957 {
5958         int ret;
5959
5960         ret = tracing_check_open_get_tr(NULL);
5961         if (ret)
5962                 return ret;
5963
5964         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5965 }
5966
5967
5968 static const struct file_operations tracing_saved_tgids_fops = {
5969         .open           = tracing_saved_tgids_open,
5970         .read           = seq_read,
5971         .llseek         = seq_lseek,
5972         .release        = seq_release,
5973 };
5974
5975 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5976 {
5977         unsigned int *ptr = v;
5978
5979         if (*pos || m->count)
5980                 ptr++;
5981
5982         (*pos)++;
5983
5984         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5985              ptr++) {
5986                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5987                         continue;
5988
5989                 return ptr;
5990         }
5991
5992         return NULL;
5993 }
5994
5995 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5996 {
5997         void *v;
5998         loff_t l = 0;
5999
6000         preempt_disable();
6001         arch_spin_lock(&trace_cmdline_lock);
6002
6003         v = &savedcmd->map_cmdline_to_pid[0];
6004         while (l <= *pos) {
6005                 v = saved_cmdlines_next(m, v, &l);
6006                 if (!v)
6007                         return NULL;
6008         }
6009
6010         return v;
6011 }
6012
6013 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6014 {
6015         arch_spin_unlock(&trace_cmdline_lock);
6016         preempt_enable();
6017 }
6018
6019 static int saved_cmdlines_show(struct seq_file *m, void *v)
6020 {
6021         char buf[TASK_COMM_LEN];
6022         unsigned int *pid = v;
6023
6024         __trace_find_cmdline(*pid, buf);
6025         seq_printf(m, "%d %s\n", *pid, buf);
6026         return 0;
6027 }
6028
6029 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6030         .start          = saved_cmdlines_start,
6031         .next           = saved_cmdlines_next,
6032         .stop           = saved_cmdlines_stop,
6033         .show           = saved_cmdlines_show,
6034 };
6035
6036 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6037 {
6038         int ret;
6039
6040         ret = tracing_check_open_get_tr(NULL);
6041         if (ret)
6042                 return ret;
6043
6044         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6045 }
6046
6047 static const struct file_operations tracing_saved_cmdlines_fops = {
6048         .open           = tracing_saved_cmdlines_open,
6049         .read           = seq_read,
6050         .llseek         = seq_lseek,
6051         .release        = seq_release,
6052 };
6053
6054 static ssize_t
6055 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6056                                  size_t cnt, loff_t *ppos)
6057 {
6058         char buf[64];
6059         int r;
6060
6061         preempt_disable();
6062         arch_spin_lock(&trace_cmdline_lock);
6063         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6064         arch_spin_unlock(&trace_cmdline_lock);
6065         preempt_enable();
6066
6067         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6068 }
6069
6070 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6071 {
6072         kfree(s->saved_cmdlines);
6073         kfree(s->map_cmdline_to_pid);
6074         kfree(s);
6075 }
6076
6077 static int tracing_resize_saved_cmdlines(unsigned int val)
6078 {
6079         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6080
6081         s = kmalloc(sizeof(*s), GFP_KERNEL);
6082         if (!s)
6083                 return -ENOMEM;
6084
6085         if (allocate_cmdlines_buffer(val, s) < 0) {
6086                 kfree(s);
6087                 return -ENOMEM;
6088         }
6089
6090         preempt_disable();
6091         arch_spin_lock(&trace_cmdline_lock);
6092         savedcmd_temp = savedcmd;
6093         savedcmd = s;
6094         arch_spin_unlock(&trace_cmdline_lock);
6095         preempt_enable();
6096         free_saved_cmdlines_buffer(savedcmd_temp);
6097
6098         return 0;
6099 }
6100
6101 static ssize_t
6102 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6103                                   size_t cnt, loff_t *ppos)
6104 {
6105         unsigned long val;
6106         int ret;
6107
6108         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6109         if (ret)
6110                 return ret;
6111
6112         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6113         if (!val || val > PID_MAX_DEFAULT)
6114                 return -EINVAL;
6115
6116         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6117         if (ret < 0)
6118                 return ret;
6119
6120         *ppos += cnt;
6121
6122         return cnt;
6123 }
6124
6125 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6126         .open           = tracing_open_generic,
6127         .read           = tracing_saved_cmdlines_size_read,
6128         .write          = tracing_saved_cmdlines_size_write,
6129 };
6130
6131 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6132 static union trace_eval_map_item *
6133 update_eval_map(union trace_eval_map_item *ptr)
6134 {
6135         if (!ptr->map.eval_string) {
6136                 if (ptr->tail.next) {
6137                         ptr = ptr->tail.next;
6138                         /* Set ptr to the next real item (skip head) */
6139                         ptr++;
6140                 } else
6141                         return NULL;
6142         }
6143         return ptr;
6144 }
6145
6146 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6147 {
6148         union trace_eval_map_item *ptr = v;
6149
6150         /*
6151          * Paranoid! If ptr points to end, we don't want to increment past it.
6152          * This really should never happen.
6153          */
6154         (*pos)++;
6155         ptr = update_eval_map(ptr);
6156         if (WARN_ON_ONCE(!ptr))
6157                 return NULL;
6158
6159         ptr++;
6160         ptr = update_eval_map(ptr);
6161
6162         return ptr;
6163 }
6164
6165 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6166 {
6167         union trace_eval_map_item *v;
6168         loff_t l = 0;
6169
6170         mutex_lock(&trace_eval_mutex);
6171
6172         v = trace_eval_maps;
6173         if (v)
6174                 v++;
6175
6176         while (v && l < *pos) {
6177                 v = eval_map_next(m, v, &l);
6178         }
6179
6180         return v;
6181 }
6182
6183 static void eval_map_stop(struct seq_file *m, void *v)
6184 {
6185         mutex_unlock(&trace_eval_mutex);
6186 }
6187
6188 static int eval_map_show(struct seq_file *m, void *v)
6189 {
6190         union trace_eval_map_item *ptr = v;
6191
6192         seq_printf(m, "%s %ld (%s)\n",
6193                    ptr->map.eval_string, ptr->map.eval_value,
6194                    ptr->map.system);
6195
6196         return 0;
6197 }
6198
6199 static const struct seq_operations tracing_eval_map_seq_ops = {
6200         .start          = eval_map_start,
6201         .next           = eval_map_next,
6202         .stop           = eval_map_stop,
6203         .show           = eval_map_show,
6204 };
6205
6206 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6207 {
6208         int ret;
6209
6210         ret = tracing_check_open_get_tr(NULL);
6211         if (ret)
6212                 return ret;
6213
6214         return seq_open(filp, &tracing_eval_map_seq_ops);
6215 }
6216
6217 static const struct file_operations tracing_eval_map_fops = {
6218         .open           = tracing_eval_map_open,
6219         .read           = seq_read,
6220         .llseek         = seq_lseek,
6221         .release        = seq_release,
6222 };
6223
6224 static inline union trace_eval_map_item *
6225 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6226 {
6227         /* Return tail of array given the head */
6228         return ptr + ptr->head.length + 1;
6229 }
6230
6231 static void
6232 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6233                            int len)
6234 {
6235         struct trace_eval_map **stop;
6236         struct trace_eval_map **map;
6237         union trace_eval_map_item *map_array;
6238         union trace_eval_map_item *ptr;
6239
6240         stop = start + len;
6241
6242         /*
6243          * The trace_eval_maps contains the map plus a head and tail item,
6244          * where the head holds the module and length of array, and the
6245          * tail holds a pointer to the next list.
6246          */
6247         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6248         if (!map_array) {
6249                 pr_warn("Unable to allocate trace eval mapping\n");
6250                 return;
6251         }
6252
6253         mutex_lock(&trace_eval_mutex);
6254
6255         if (!trace_eval_maps)
6256                 trace_eval_maps = map_array;
6257         else {
6258                 ptr = trace_eval_maps;
6259                 for (;;) {
6260                         ptr = trace_eval_jmp_to_tail(ptr);
6261                         if (!ptr->tail.next)
6262                                 break;
6263                         ptr = ptr->tail.next;
6264
6265                 }
6266                 ptr->tail.next = map_array;
6267         }
6268         map_array->head.mod = mod;
6269         map_array->head.length = len;
6270         map_array++;
6271
6272         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6273                 map_array->map = **map;
6274                 map_array++;
6275         }
6276         memset(map_array, 0, sizeof(*map_array));
6277
6278         mutex_unlock(&trace_eval_mutex);
6279 }
6280
6281 static void trace_create_eval_file(struct dentry *d_tracer)
6282 {
6283         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6284                           NULL, &tracing_eval_map_fops);
6285 }
6286
6287 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6288 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6289 static inline void trace_insert_eval_map_file(struct module *mod,
6290                               struct trace_eval_map **start, int len) { }
6291 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6292
6293 static void trace_insert_eval_map(struct module *mod,
6294                                   struct trace_eval_map **start, int len)
6295 {
6296         struct trace_eval_map **map;
6297
6298         if (len <= 0)
6299                 return;
6300
6301         map = start;
6302
6303         trace_event_eval_update(map, len);
6304
6305         trace_insert_eval_map_file(mod, start, len);
6306 }
6307
6308 static ssize_t
6309 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6310                        size_t cnt, loff_t *ppos)
6311 {
6312         struct trace_array *tr = filp->private_data;
6313         char buf[MAX_TRACER_SIZE+2];
6314         int r;
6315
6316         mutex_lock(&trace_types_lock);
6317         r = sprintf(buf, "%s\n", tr->current_trace->name);
6318         mutex_unlock(&trace_types_lock);
6319
6320         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6321 }
6322
6323 int tracer_init(struct tracer *t, struct trace_array *tr)
6324 {
6325         tracing_reset_online_cpus(&tr->array_buffer);
6326         return t->init(tr);
6327 }
6328
6329 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6330 {
6331         int cpu;
6332
6333         for_each_tracing_cpu(cpu)
6334                 per_cpu_ptr(buf->data, cpu)->entries = val;
6335 }
6336
6337 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6338 {
6339         if (cpu == RING_BUFFER_ALL_CPUS) {
6340                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6341         } else {
6342                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6343         }
6344 }
6345
6346 #ifdef CONFIG_TRACER_MAX_TRACE
6347 /* resize @tr's buffer to the size of @size_tr's entries */
6348 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6349                                         struct array_buffer *size_buf, int cpu_id)
6350 {
6351         int cpu, ret = 0;
6352
6353         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6354                 for_each_tracing_cpu(cpu) {
6355                         ret = ring_buffer_resize(trace_buf->buffer,
6356                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6357                         if (ret < 0)
6358                                 break;
6359                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6360                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6361                 }
6362         } else {
6363                 ret = ring_buffer_resize(trace_buf->buffer,
6364                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6365                 if (ret == 0)
6366                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6367                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6368         }
6369
6370         return ret;
6371 }
6372 #endif /* CONFIG_TRACER_MAX_TRACE */
6373
6374 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6375                                         unsigned long size, int cpu)
6376 {
6377         int ret;
6378
6379         /*
6380          * If kernel or user changes the size of the ring buffer
6381          * we use the size that was given, and we can forget about
6382          * expanding it later.
6383          */
6384         trace_set_ring_buffer_expanded(tr);
6385
6386         /* May be called before buffers are initialized */
6387         if (!tr->array_buffer.buffer)
6388                 return 0;
6389
6390         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6391         if (ret < 0)
6392                 return ret;
6393
6394 #ifdef CONFIG_TRACER_MAX_TRACE
6395         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6396             !tr->current_trace->use_max_tr)
6397                 goto out;
6398
6399         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6400         if (ret < 0) {
6401                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6402                                                      &tr->array_buffer, cpu);
6403                 if (r < 0) {
6404                         /*
6405                          * AARGH! We are left with different
6406                          * size max buffer!!!!
6407                          * The max buffer is our "snapshot" buffer.
6408                          * When a tracer needs a snapshot (one of the
6409                          * latency tracers), it swaps the max buffer
6410                          * with the saved snap shot. We succeeded to
6411                          * update the size of the main buffer, but failed to
6412                          * update the size of the max buffer. But when we tried
6413                          * to reset the main buffer to the original size, we
6414                          * failed there too. This is very unlikely to
6415                          * happen, but if it does, warn and kill all
6416                          * tracing.
6417                          */
6418                         WARN_ON(1);
6419                         tracing_disabled = 1;
6420                 }
6421                 return ret;
6422         }
6423
6424         update_buffer_entries(&tr->max_buffer, cpu);
6425
6426  out:
6427 #endif /* CONFIG_TRACER_MAX_TRACE */
6428
6429         update_buffer_entries(&tr->array_buffer, cpu);
6430
6431         return ret;
6432 }
6433
6434 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6435                                   unsigned long size, int cpu_id)
6436 {
6437         int ret;
6438
6439         mutex_lock(&trace_types_lock);
6440
6441         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6442                 /* make sure, this cpu is enabled in the mask */
6443                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6444                         ret = -EINVAL;
6445                         goto out;
6446                 }
6447         }
6448
6449         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6450         if (ret < 0)
6451                 ret = -ENOMEM;
6452
6453 out:
6454         mutex_unlock(&trace_types_lock);
6455
6456         return ret;
6457 }
6458
6459
6460 /**
6461  * tracing_update_buffers - used by tracing facility to expand ring buffers
6462  * @tr: The tracing instance
6463  *
6464  * To save on memory when the tracing is never used on a system with it
6465  * configured in. The ring buffers are set to a minimum size. But once
6466  * a user starts to use the tracing facility, then they need to grow
6467  * to their default size.
6468  *
6469  * This function is to be called when a tracer is about to be used.
6470  */
6471 int tracing_update_buffers(struct trace_array *tr)
6472 {
6473         int ret = 0;
6474
6475         mutex_lock(&trace_types_lock);
6476         if (!tr->ring_buffer_expanded)
6477                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6478                                                 RING_BUFFER_ALL_CPUS);
6479         mutex_unlock(&trace_types_lock);
6480
6481         return ret;
6482 }
6483
6484 struct trace_option_dentry;
6485
6486 static void
6487 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6488
6489 /*
6490  * Used to clear out the tracer before deletion of an instance.
6491  * Must have trace_types_lock held.
6492  */
6493 static void tracing_set_nop(struct trace_array *tr)
6494 {
6495         if (tr->current_trace == &nop_trace)
6496                 return;
6497         
6498         tr->current_trace->enabled--;
6499
6500         if (tr->current_trace->reset)
6501                 tr->current_trace->reset(tr);
6502
6503         tr->current_trace = &nop_trace;
6504 }
6505
6506 static bool tracer_options_updated;
6507
6508 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6509 {
6510         /* Only enable if the directory has been created already. */
6511         if (!tr->dir)
6512                 return;
6513
6514         /* Only create trace option files after update_tracer_options finish */
6515         if (!tracer_options_updated)
6516                 return;
6517
6518         create_trace_option_files(tr, t);
6519 }
6520
6521 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6522 {
6523         struct tracer *t;
6524 #ifdef CONFIG_TRACER_MAX_TRACE
6525         bool had_max_tr;
6526 #endif
6527         int ret = 0;
6528
6529         mutex_lock(&trace_types_lock);
6530
6531         if (!tr->ring_buffer_expanded) {
6532                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6533                                                 RING_BUFFER_ALL_CPUS);
6534                 if (ret < 0)
6535                         goto out;
6536                 ret = 0;
6537         }
6538
6539         for (t = trace_types; t; t = t->next) {
6540                 if (strcmp(t->name, buf) == 0)
6541                         break;
6542         }
6543         if (!t) {
6544                 ret = -EINVAL;
6545                 goto out;
6546         }
6547         if (t == tr->current_trace)
6548                 goto out;
6549
6550 #ifdef CONFIG_TRACER_SNAPSHOT
6551         if (t->use_max_tr) {
6552                 local_irq_disable();
6553                 arch_spin_lock(&tr->max_lock);
6554                 if (tr->cond_snapshot)
6555                         ret = -EBUSY;
6556                 arch_spin_unlock(&tr->max_lock);
6557                 local_irq_enable();
6558                 if (ret)
6559                         goto out;
6560         }
6561 #endif
6562         /* Some tracers won't work on kernel command line */
6563         if (system_state < SYSTEM_RUNNING && t->noboot) {
6564                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6565                         t->name);
6566                 goto out;
6567         }
6568
6569         /* Some tracers are only allowed for the top level buffer */
6570         if (!trace_ok_for_array(t, tr)) {
6571                 ret = -EINVAL;
6572                 goto out;
6573         }
6574
6575         /* If trace pipe files are being read, we can't change the tracer */
6576         if (tr->trace_ref) {
6577                 ret = -EBUSY;
6578                 goto out;
6579         }
6580
6581         trace_branch_disable();
6582
6583         tr->current_trace->enabled--;
6584
6585         if (tr->current_trace->reset)
6586                 tr->current_trace->reset(tr);
6587
6588 #ifdef CONFIG_TRACER_MAX_TRACE
6589         had_max_tr = tr->current_trace->use_max_tr;
6590
6591         /* Current trace needs to be nop_trace before synchronize_rcu */
6592         tr->current_trace = &nop_trace;
6593
6594         if (had_max_tr && !t->use_max_tr) {
6595                 /*
6596                  * We need to make sure that the update_max_tr sees that
6597                  * current_trace changed to nop_trace to keep it from
6598                  * swapping the buffers after we resize it.
6599                  * The update_max_tr is called from interrupts disabled
6600                  * so a synchronized_sched() is sufficient.
6601                  */
6602                 synchronize_rcu();
6603                 free_snapshot(tr);
6604         }
6605
6606         if (t->use_max_tr && !tr->allocated_snapshot) {
6607                 ret = tracing_alloc_snapshot_instance(tr);
6608                 if (ret < 0)
6609                         goto out;
6610         }
6611 #else
6612         tr->current_trace = &nop_trace;
6613 #endif
6614
6615         if (t->init) {
6616                 ret = tracer_init(t, tr);
6617                 if (ret)
6618                         goto out;
6619         }
6620
6621         tr->current_trace = t;
6622         tr->current_trace->enabled++;
6623         trace_branch_enable(tr);
6624  out:
6625         mutex_unlock(&trace_types_lock);
6626
6627         return ret;
6628 }
6629
6630 static ssize_t
6631 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6632                         size_t cnt, loff_t *ppos)
6633 {
6634         struct trace_array *tr = filp->private_data;
6635         char buf[MAX_TRACER_SIZE+1];
6636         char *name;
6637         size_t ret;
6638         int err;
6639
6640         ret = cnt;
6641
6642         if (cnt > MAX_TRACER_SIZE)
6643                 cnt = MAX_TRACER_SIZE;
6644
6645         if (copy_from_user(buf, ubuf, cnt))
6646                 return -EFAULT;
6647
6648         buf[cnt] = 0;
6649
6650         name = strim(buf);
6651
6652         err = tracing_set_tracer(tr, name);
6653         if (err)
6654                 return err;
6655
6656         *ppos += ret;
6657
6658         return ret;
6659 }
6660
6661 static ssize_t
6662 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6663                    size_t cnt, loff_t *ppos)
6664 {
6665         char buf[64];
6666         int r;
6667
6668         r = snprintf(buf, sizeof(buf), "%ld\n",
6669                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6670         if (r > sizeof(buf))
6671                 r = sizeof(buf);
6672         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6673 }
6674
6675 static ssize_t
6676 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6677                     size_t cnt, loff_t *ppos)
6678 {
6679         unsigned long val;
6680         int ret;
6681
6682         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6683         if (ret)
6684                 return ret;
6685
6686         *ptr = val * 1000;
6687
6688         return cnt;
6689 }
6690
6691 static ssize_t
6692 tracing_thresh_read(struct file *filp, char __user *ubuf,
6693                     size_t cnt, loff_t *ppos)
6694 {
6695         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6696 }
6697
6698 static ssize_t
6699 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6700                      size_t cnt, loff_t *ppos)
6701 {
6702         struct trace_array *tr = filp->private_data;
6703         int ret;
6704
6705         mutex_lock(&trace_types_lock);
6706         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6707         if (ret < 0)
6708                 goto out;
6709
6710         if (tr->current_trace->update_thresh) {
6711                 ret = tr->current_trace->update_thresh(tr);
6712                 if (ret < 0)
6713                         goto out;
6714         }
6715
6716         ret = cnt;
6717 out:
6718         mutex_unlock(&trace_types_lock);
6719
6720         return ret;
6721 }
6722
6723 #ifdef CONFIG_TRACER_MAX_TRACE
6724
6725 static ssize_t
6726 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6727                      size_t cnt, loff_t *ppos)
6728 {
6729         struct trace_array *tr = filp->private_data;
6730
6731         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6732 }
6733
6734 static ssize_t
6735 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6736                       size_t cnt, loff_t *ppos)
6737 {
6738         struct trace_array *tr = filp->private_data;
6739
6740         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6741 }
6742
6743 #endif
6744
6745 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6746 {
6747         if (cpu == RING_BUFFER_ALL_CPUS) {
6748                 if (cpumask_empty(tr->pipe_cpumask)) {
6749                         cpumask_setall(tr->pipe_cpumask);
6750                         return 0;
6751                 }
6752         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6753                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6754                 return 0;
6755         }
6756         return -EBUSY;
6757 }
6758
6759 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6760 {
6761         if (cpu == RING_BUFFER_ALL_CPUS) {
6762                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6763                 cpumask_clear(tr->pipe_cpumask);
6764         } else {
6765                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6766                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6767         }
6768 }
6769
6770 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6771 {
6772         struct trace_array *tr = inode->i_private;
6773         struct trace_iterator *iter;
6774         int cpu;
6775         int ret;
6776
6777         ret = tracing_check_open_get_tr(tr);
6778         if (ret)
6779                 return ret;
6780
6781         mutex_lock(&trace_types_lock);
6782         cpu = tracing_get_cpu(inode);
6783         ret = open_pipe_on_cpu(tr, cpu);
6784         if (ret)
6785                 goto fail_pipe_on_cpu;
6786
6787         /* create a buffer to store the information to pass to userspace */
6788         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6789         if (!iter) {
6790                 ret = -ENOMEM;
6791                 goto fail_alloc_iter;
6792         }
6793
6794         trace_seq_init(&iter->seq);
6795         iter->trace = tr->current_trace;
6796
6797         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6798                 ret = -ENOMEM;
6799                 goto fail;
6800         }
6801
6802         /* trace pipe does not show start of buffer */
6803         cpumask_setall(iter->started);
6804
6805         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6806                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6807
6808         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6809         if (trace_clocks[tr->clock_id].in_ns)
6810                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6811
6812         iter->tr = tr;
6813         iter->array_buffer = &tr->array_buffer;
6814         iter->cpu_file = cpu;
6815         mutex_init(&iter->mutex);
6816         filp->private_data = iter;
6817
6818         if (iter->trace->pipe_open)
6819                 iter->trace->pipe_open(iter);
6820
6821         nonseekable_open(inode, filp);
6822
6823         tr->trace_ref++;
6824
6825         mutex_unlock(&trace_types_lock);
6826         return ret;
6827
6828 fail:
6829         kfree(iter);
6830 fail_alloc_iter:
6831         close_pipe_on_cpu(tr, cpu);
6832 fail_pipe_on_cpu:
6833         __trace_array_put(tr);
6834         mutex_unlock(&trace_types_lock);
6835         return ret;
6836 }
6837
6838 static int tracing_release_pipe(struct inode *inode, struct file *file)
6839 {
6840         struct trace_iterator *iter = file->private_data;
6841         struct trace_array *tr = inode->i_private;
6842
6843         mutex_lock(&trace_types_lock);
6844
6845         tr->trace_ref--;
6846
6847         if (iter->trace->pipe_close)
6848                 iter->trace->pipe_close(iter);
6849         close_pipe_on_cpu(tr, iter->cpu_file);
6850         mutex_unlock(&trace_types_lock);
6851
6852         free_trace_iter_content(iter);
6853         kfree(iter);
6854
6855         trace_array_put(tr);
6856
6857         return 0;
6858 }
6859
6860 static __poll_t
6861 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6862 {
6863         struct trace_array *tr = iter->tr;
6864
6865         /* Iterators are static, they should be filled or empty */
6866         if (trace_buffer_iter(iter, iter->cpu_file))
6867                 return EPOLLIN | EPOLLRDNORM;
6868
6869         if (tr->trace_flags & TRACE_ITER_BLOCK)
6870                 /*
6871                  * Always select as readable when in blocking mode
6872                  */
6873                 return EPOLLIN | EPOLLRDNORM;
6874         else
6875                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6876                                              filp, poll_table, iter->tr->buffer_percent);
6877 }
6878
6879 static __poll_t
6880 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6881 {
6882         struct trace_iterator *iter = filp->private_data;
6883
6884         return trace_poll(iter, filp, poll_table);
6885 }
6886
6887 /* Must be called with iter->mutex held. */
6888 static int tracing_wait_pipe(struct file *filp)
6889 {
6890         struct trace_iterator *iter = filp->private_data;
6891         int ret;
6892
6893         while (trace_empty(iter)) {
6894
6895                 if ((filp->f_flags & O_NONBLOCK)) {
6896                         return -EAGAIN;
6897                 }
6898
6899                 /*
6900                  * We block until we read something and tracing is disabled.
6901                  * We still block if tracing is disabled, but we have never
6902                  * read anything. This allows a user to cat this file, and
6903                  * then enable tracing. But after we have read something,
6904                  * we give an EOF when tracing is again disabled.
6905                  *
6906                  * iter->pos will be 0 if we haven't read anything.
6907                  */
6908                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6909                         break;
6910
6911                 mutex_unlock(&iter->mutex);
6912
6913                 ret = wait_on_pipe(iter, 0);
6914
6915                 mutex_lock(&iter->mutex);
6916
6917                 if (ret)
6918                         return ret;
6919         }
6920
6921         return 1;
6922 }
6923
6924 /*
6925  * Consumer reader.
6926  */
6927 static ssize_t
6928 tracing_read_pipe(struct file *filp, char __user *ubuf,
6929                   size_t cnt, loff_t *ppos)
6930 {
6931         struct trace_iterator *iter = filp->private_data;
6932         ssize_t sret;
6933
6934         /*
6935          * Avoid more than one consumer on a single file descriptor
6936          * This is just a matter of traces coherency, the ring buffer itself
6937          * is protected.
6938          */
6939         mutex_lock(&iter->mutex);
6940
6941         /* return any leftover data */
6942         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6943         if (sret != -EBUSY)
6944                 goto out;
6945
6946         trace_seq_init(&iter->seq);
6947
6948         if (iter->trace->read) {
6949                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6950                 if (sret)
6951                         goto out;
6952         }
6953
6954 waitagain:
6955         sret = tracing_wait_pipe(filp);
6956         if (sret <= 0)
6957                 goto out;
6958
6959         /* stop when tracing is finished */
6960         if (trace_empty(iter)) {
6961                 sret = 0;
6962                 goto out;
6963         }
6964
6965         if (cnt >= PAGE_SIZE)
6966                 cnt = PAGE_SIZE - 1;
6967
6968         /* reset all but tr, trace, and overruns */
6969         trace_iterator_reset(iter);
6970         cpumask_clear(iter->started);
6971         trace_seq_init(&iter->seq);
6972
6973         trace_event_read_lock();
6974         trace_access_lock(iter->cpu_file);
6975         while (trace_find_next_entry_inc(iter) != NULL) {
6976                 enum print_line_t ret;
6977                 int save_len = iter->seq.seq.len;
6978
6979                 ret = print_trace_line(iter);
6980                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6981                         /*
6982                          * If one print_trace_line() fills entire trace_seq in one shot,
6983                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6984                          * In this case, we need to consume it, otherwise, loop will peek
6985                          * this event next time, resulting in an infinite loop.
6986                          */
6987                         if (save_len == 0) {
6988                                 iter->seq.full = 0;
6989                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6990                                 trace_consume(iter);
6991                                 break;
6992                         }
6993
6994                         /* In other cases, don't print partial lines */
6995                         iter->seq.seq.len = save_len;
6996                         break;
6997                 }
6998                 if (ret != TRACE_TYPE_NO_CONSUME)
6999                         trace_consume(iter);
7000
7001                 if (trace_seq_used(&iter->seq) >= cnt)
7002                         break;
7003
7004                 /*
7005                  * Setting the full flag means we reached the trace_seq buffer
7006                  * size and we should leave by partial output condition above.
7007                  * One of the trace_seq_* functions is not used properly.
7008                  */
7009                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7010                           iter->ent->type);
7011         }
7012         trace_access_unlock(iter->cpu_file);
7013         trace_event_read_unlock();
7014
7015         /* Now copy what we have to the user */
7016         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7017         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7018                 trace_seq_init(&iter->seq);
7019
7020         /*
7021          * If there was nothing to send to user, in spite of consuming trace
7022          * entries, go back to wait for more entries.
7023          */
7024         if (sret == -EBUSY)
7025                 goto waitagain;
7026
7027 out:
7028         mutex_unlock(&iter->mutex);
7029
7030         return sret;
7031 }
7032
7033 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7034                                      unsigned int idx)
7035 {
7036         __free_page(spd->pages[idx]);
7037 }
7038
7039 static size_t
7040 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7041 {
7042         size_t count;
7043         int save_len;
7044         int ret;
7045
7046         /* Seq buffer is page-sized, exactly what we need. */
7047         for (;;) {
7048                 save_len = iter->seq.seq.len;
7049                 ret = print_trace_line(iter);
7050
7051                 if (trace_seq_has_overflowed(&iter->seq)) {
7052                         iter->seq.seq.len = save_len;
7053                         break;
7054                 }
7055
7056                 /*
7057                  * This should not be hit, because it should only
7058                  * be set if the iter->seq overflowed. But check it
7059                  * anyway to be safe.
7060                  */
7061                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7062                         iter->seq.seq.len = save_len;
7063                         break;
7064                 }
7065
7066                 count = trace_seq_used(&iter->seq) - save_len;
7067                 if (rem < count) {
7068                         rem = 0;
7069                         iter->seq.seq.len = save_len;
7070                         break;
7071                 }
7072
7073                 if (ret != TRACE_TYPE_NO_CONSUME)
7074                         trace_consume(iter);
7075                 rem -= count;
7076                 if (!trace_find_next_entry_inc(iter))   {
7077                         rem = 0;
7078                         iter->ent = NULL;
7079                         break;
7080                 }
7081         }
7082
7083         return rem;
7084 }
7085
7086 static ssize_t tracing_splice_read_pipe(struct file *filp,
7087                                         loff_t *ppos,
7088                                         struct pipe_inode_info *pipe,
7089                                         size_t len,
7090                                         unsigned int flags)
7091 {
7092         struct page *pages_def[PIPE_DEF_BUFFERS];
7093         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7094         struct trace_iterator *iter = filp->private_data;
7095         struct splice_pipe_desc spd = {
7096                 .pages          = pages_def,
7097                 .partial        = partial_def,
7098                 .nr_pages       = 0, /* This gets updated below. */
7099                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7100                 .ops            = &default_pipe_buf_ops,
7101                 .spd_release    = tracing_spd_release_pipe,
7102         };
7103         ssize_t ret;
7104         size_t rem;
7105         unsigned int i;
7106
7107         if (splice_grow_spd(pipe, &spd))
7108                 return -ENOMEM;
7109
7110         mutex_lock(&iter->mutex);
7111
7112         if (iter->trace->splice_read) {
7113                 ret = iter->trace->splice_read(iter, filp,
7114                                                ppos, pipe, len, flags);
7115                 if (ret)
7116                         goto out_err;
7117         }
7118
7119         ret = tracing_wait_pipe(filp);
7120         if (ret <= 0)
7121                 goto out_err;
7122
7123         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7124                 ret = -EFAULT;
7125                 goto out_err;
7126         }
7127
7128         trace_event_read_lock();
7129         trace_access_lock(iter->cpu_file);
7130
7131         /* Fill as many pages as possible. */
7132         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7133                 spd.pages[i] = alloc_page(GFP_KERNEL);
7134                 if (!spd.pages[i])
7135                         break;
7136
7137                 rem = tracing_fill_pipe_page(rem, iter);
7138
7139                 /* Copy the data into the page, so we can start over. */
7140                 ret = trace_seq_to_buffer(&iter->seq,
7141                                           page_address(spd.pages[i]),
7142                                           trace_seq_used(&iter->seq));
7143                 if (ret < 0) {
7144                         __free_page(spd.pages[i]);
7145                         break;
7146                 }
7147                 spd.partial[i].offset = 0;
7148                 spd.partial[i].len = trace_seq_used(&iter->seq);
7149
7150                 trace_seq_init(&iter->seq);
7151         }
7152
7153         trace_access_unlock(iter->cpu_file);
7154         trace_event_read_unlock();
7155         mutex_unlock(&iter->mutex);
7156
7157         spd.nr_pages = i;
7158
7159         if (i)
7160                 ret = splice_to_pipe(pipe, &spd);
7161         else
7162                 ret = 0;
7163 out:
7164         splice_shrink_spd(&spd);
7165         return ret;
7166
7167 out_err:
7168         mutex_unlock(&iter->mutex);
7169         goto out;
7170 }
7171
7172 static ssize_t
7173 tracing_entries_read(struct file *filp, char __user *ubuf,
7174                      size_t cnt, loff_t *ppos)
7175 {
7176         struct inode *inode = file_inode(filp);
7177         struct trace_array *tr = inode->i_private;
7178         int cpu = tracing_get_cpu(inode);
7179         char buf[64];
7180         int r = 0;
7181         ssize_t ret;
7182
7183         mutex_lock(&trace_types_lock);
7184
7185         if (cpu == RING_BUFFER_ALL_CPUS) {
7186                 int cpu, buf_size_same;
7187                 unsigned long size;
7188
7189                 size = 0;
7190                 buf_size_same = 1;
7191                 /* check if all cpu sizes are same */
7192                 for_each_tracing_cpu(cpu) {
7193                         /* fill in the size from first enabled cpu */
7194                         if (size == 0)
7195                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7196                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7197                                 buf_size_same = 0;
7198                                 break;
7199                         }
7200                 }
7201
7202                 if (buf_size_same) {
7203                         if (!tr->ring_buffer_expanded)
7204                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7205                                             size >> 10,
7206                                             trace_buf_size >> 10);
7207                         else
7208                                 r = sprintf(buf, "%lu\n", size >> 10);
7209                 } else
7210                         r = sprintf(buf, "X\n");
7211         } else
7212                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7213
7214         mutex_unlock(&trace_types_lock);
7215
7216         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7217         return ret;
7218 }
7219
7220 static ssize_t
7221 tracing_entries_write(struct file *filp, const char __user *ubuf,
7222                       size_t cnt, loff_t *ppos)
7223 {
7224         struct inode *inode = file_inode(filp);
7225         struct trace_array *tr = inode->i_private;
7226         unsigned long val;
7227         int ret;
7228
7229         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7230         if (ret)
7231                 return ret;
7232
7233         /* must have at least 1 entry */
7234         if (!val)
7235                 return -EINVAL;
7236
7237         /* value is in KB */
7238         val <<= 10;
7239         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7240         if (ret < 0)
7241                 return ret;
7242
7243         *ppos += cnt;
7244
7245         return cnt;
7246 }
7247
7248 static ssize_t
7249 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7250                                 size_t cnt, loff_t *ppos)
7251 {
7252         struct trace_array *tr = filp->private_data;
7253         char buf[64];
7254         int r, cpu;
7255         unsigned long size = 0, expanded_size = 0;
7256
7257         mutex_lock(&trace_types_lock);
7258         for_each_tracing_cpu(cpu) {
7259                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7260                 if (!tr->ring_buffer_expanded)
7261                         expanded_size += trace_buf_size >> 10;
7262         }
7263         if (tr->ring_buffer_expanded)
7264                 r = sprintf(buf, "%lu\n", size);
7265         else
7266                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7267         mutex_unlock(&trace_types_lock);
7268
7269         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7270 }
7271
7272 static ssize_t
7273 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7274                           size_t cnt, loff_t *ppos)
7275 {
7276         /*
7277          * There is no need to read what the user has written, this function
7278          * is just to make sure that there is no error when "echo" is used
7279          */
7280
7281         *ppos += cnt;
7282
7283         return cnt;
7284 }
7285
7286 static int
7287 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7288 {
7289         struct trace_array *tr = inode->i_private;
7290
7291         /* disable tracing ? */
7292         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7293                 tracer_tracing_off(tr);
7294         /* resize the ring buffer to 0 */
7295         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7296
7297         trace_array_put(tr);
7298
7299         return 0;
7300 }
7301
7302 static ssize_t
7303 tracing_mark_write(struct file *filp, const char __user *ubuf,
7304                                         size_t cnt, loff_t *fpos)
7305 {
7306         struct trace_array *tr = filp->private_data;
7307         struct ring_buffer_event *event;
7308         enum event_trigger_type tt = ETT_NONE;
7309         struct trace_buffer *buffer;
7310         struct print_entry *entry;
7311         ssize_t written;
7312         int size;
7313         int len;
7314
7315 /* Used in tracing_mark_raw_write() as well */
7316 #define FAULTED_STR "<faulted>"
7317 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7318
7319         if (tracing_disabled)
7320                 return -EINVAL;
7321
7322         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7323                 return -EINVAL;
7324
7325         if (cnt > TRACE_BUF_SIZE)
7326                 cnt = TRACE_BUF_SIZE;
7327
7328         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7329
7330         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7331
7332         /* If less than "<faulted>", then make sure we can still add that */
7333         if (cnt < FAULTED_SIZE)
7334                 size += FAULTED_SIZE - cnt;
7335
7336         buffer = tr->array_buffer.buffer;
7337         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7338                                             tracing_gen_ctx());
7339         if (unlikely(!event))
7340                 /* Ring buffer disabled, return as if not open for write */
7341                 return -EBADF;
7342
7343         entry = ring_buffer_event_data(event);
7344         entry->ip = _THIS_IP_;
7345
7346         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7347         if (len) {
7348                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7349                 cnt = FAULTED_SIZE;
7350                 written = -EFAULT;
7351         } else
7352                 written = cnt;
7353
7354         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7355                 /* do not add \n before testing triggers, but add \0 */
7356                 entry->buf[cnt] = '\0';
7357                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7358         }
7359
7360         if (entry->buf[cnt - 1] != '\n') {
7361                 entry->buf[cnt] = '\n';
7362                 entry->buf[cnt + 1] = '\0';
7363         } else
7364                 entry->buf[cnt] = '\0';
7365
7366         if (static_branch_unlikely(&trace_marker_exports_enabled))
7367                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7368         __buffer_unlock_commit(buffer, event);
7369
7370         if (tt)
7371                 event_triggers_post_call(tr->trace_marker_file, tt);
7372
7373         return written;
7374 }
7375
7376 /* Limit it for now to 3K (including tag) */
7377 #define RAW_DATA_MAX_SIZE (1024*3)
7378
7379 static ssize_t
7380 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7381                                         size_t cnt, loff_t *fpos)
7382 {
7383         struct trace_array *tr = filp->private_data;
7384         struct ring_buffer_event *event;
7385         struct trace_buffer *buffer;
7386         struct raw_data_entry *entry;
7387         ssize_t written;
7388         int size;
7389         int len;
7390
7391 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7392
7393         if (tracing_disabled)
7394                 return -EINVAL;
7395
7396         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7397                 return -EINVAL;
7398
7399         /* The marker must at least have a tag id */
7400         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7401                 return -EINVAL;
7402
7403         if (cnt > TRACE_BUF_SIZE)
7404                 cnt = TRACE_BUF_SIZE;
7405
7406         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7407
7408         size = sizeof(*entry) + cnt;
7409         if (cnt < FAULT_SIZE_ID)
7410                 size += FAULT_SIZE_ID - cnt;
7411
7412         buffer = tr->array_buffer.buffer;
7413         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7414                                             tracing_gen_ctx());
7415         if (!event)
7416                 /* Ring buffer disabled, return as if not open for write */
7417                 return -EBADF;
7418
7419         entry = ring_buffer_event_data(event);
7420
7421         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7422         if (len) {
7423                 entry->id = -1;
7424                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7425                 written = -EFAULT;
7426         } else
7427                 written = cnt;
7428
7429         __buffer_unlock_commit(buffer, event);
7430
7431         return written;
7432 }
7433
7434 static int tracing_clock_show(struct seq_file *m, void *v)
7435 {
7436         struct trace_array *tr = m->private;
7437         int i;
7438
7439         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7440                 seq_printf(m,
7441                         "%s%s%s%s", i ? " " : "",
7442                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7443                         i == tr->clock_id ? "]" : "");
7444         seq_putc(m, '\n');
7445
7446         return 0;
7447 }
7448
7449 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7450 {
7451         int i;
7452
7453         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7454                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7455                         break;
7456         }
7457         if (i == ARRAY_SIZE(trace_clocks))
7458                 return -EINVAL;
7459
7460         mutex_lock(&trace_types_lock);
7461
7462         tr->clock_id = i;
7463
7464         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7465
7466         /*
7467          * New clock may not be consistent with the previous clock.
7468          * Reset the buffer so that it doesn't have incomparable timestamps.
7469          */
7470         tracing_reset_online_cpus(&tr->array_buffer);
7471
7472 #ifdef CONFIG_TRACER_MAX_TRACE
7473         if (tr->max_buffer.buffer)
7474                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7475         tracing_reset_online_cpus(&tr->max_buffer);
7476 #endif
7477
7478         mutex_unlock(&trace_types_lock);
7479
7480         return 0;
7481 }
7482
7483 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7484                                    size_t cnt, loff_t *fpos)
7485 {
7486         struct seq_file *m = filp->private_data;
7487         struct trace_array *tr = m->private;
7488         char buf[64];
7489         const char *clockstr;
7490         int ret;
7491
7492         if (cnt >= sizeof(buf))
7493                 return -EINVAL;
7494
7495         if (copy_from_user(buf, ubuf, cnt))
7496                 return -EFAULT;
7497
7498         buf[cnt] = 0;
7499
7500         clockstr = strstrip(buf);
7501
7502         ret = tracing_set_clock(tr, clockstr);
7503         if (ret)
7504                 return ret;
7505
7506         *fpos += cnt;
7507
7508         return cnt;
7509 }
7510
7511 static int tracing_clock_open(struct inode *inode, struct file *file)
7512 {
7513         struct trace_array *tr = inode->i_private;
7514         int ret;
7515
7516         ret = tracing_check_open_get_tr(tr);
7517         if (ret)
7518                 return ret;
7519
7520         ret = single_open(file, tracing_clock_show, inode->i_private);
7521         if (ret < 0)
7522                 trace_array_put(tr);
7523
7524         return ret;
7525 }
7526
7527 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7528 {
7529         struct trace_array *tr = m->private;
7530
7531         mutex_lock(&trace_types_lock);
7532
7533         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7534                 seq_puts(m, "delta [absolute]\n");
7535         else
7536                 seq_puts(m, "[delta] absolute\n");
7537
7538         mutex_unlock(&trace_types_lock);
7539
7540         return 0;
7541 }
7542
7543 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7544 {
7545         struct trace_array *tr = inode->i_private;
7546         int ret;
7547
7548         ret = tracing_check_open_get_tr(tr);
7549         if (ret)
7550                 return ret;
7551
7552         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7553         if (ret < 0)
7554                 trace_array_put(tr);
7555
7556         return ret;
7557 }
7558
7559 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7560 {
7561         if (rbe == this_cpu_read(trace_buffered_event))
7562                 return ring_buffer_time_stamp(buffer);
7563
7564         return ring_buffer_event_time_stamp(buffer, rbe);
7565 }
7566
7567 /*
7568  * Set or disable using the per CPU trace_buffer_event when possible.
7569  */
7570 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7571 {
7572         int ret = 0;
7573
7574         mutex_lock(&trace_types_lock);
7575
7576         if (set && tr->no_filter_buffering_ref++)
7577                 goto out;
7578
7579         if (!set) {
7580                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7581                         ret = -EINVAL;
7582                         goto out;
7583                 }
7584
7585                 --tr->no_filter_buffering_ref;
7586         }
7587  out:
7588         mutex_unlock(&trace_types_lock);
7589
7590         return ret;
7591 }
7592
7593 struct ftrace_buffer_info {
7594         struct trace_iterator   iter;
7595         void                    *spare;
7596         unsigned int            spare_cpu;
7597         unsigned int            read;
7598 };
7599
7600 #ifdef CONFIG_TRACER_SNAPSHOT
7601 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7602 {
7603         struct trace_array *tr = inode->i_private;
7604         struct trace_iterator *iter;
7605         struct seq_file *m;
7606         int ret;
7607
7608         ret = tracing_check_open_get_tr(tr);
7609         if (ret)
7610                 return ret;
7611
7612         if (file->f_mode & FMODE_READ) {
7613                 iter = __tracing_open(inode, file, true);
7614                 if (IS_ERR(iter))
7615                         ret = PTR_ERR(iter);
7616         } else {
7617                 /* Writes still need the seq_file to hold the private data */
7618                 ret = -ENOMEM;
7619                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7620                 if (!m)
7621                         goto out;
7622                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7623                 if (!iter) {
7624                         kfree(m);
7625                         goto out;
7626                 }
7627                 ret = 0;
7628
7629                 iter->tr = tr;
7630                 iter->array_buffer = &tr->max_buffer;
7631                 iter->cpu_file = tracing_get_cpu(inode);
7632                 m->private = iter;
7633                 file->private_data = m;
7634         }
7635 out:
7636         if (ret < 0)
7637                 trace_array_put(tr);
7638
7639         return ret;
7640 }
7641
7642 static void tracing_swap_cpu_buffer(void *tr)
7643 {
7644         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7645 }
7646
7647 static ssize_t
7648 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7649                        loff_t *ppos)
7650 {
7651         struct seq_file *m = filp->private_data;
7652         struct trace_iterator *iter = m->private;
7653         struct trace_array *tr = iter->tr;
7654         unsigned long val;
7655         int ret;
7656
7657         ret = tracing_update_buffers(tr);
7658         if (ret < 0)
7659                 return ret;
7660
7661         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7662         if (ret)
7663                 return ret;
7664
7665         mutex_lock(&trace_types_lock);
7666
7667         if (tr->current_trace->use_max_tr) {
7668                 ret = -EBUSY;
7669                 goto out;
7670         }
7671
7672         local_irq_disable();
7673         arch_spin_lock(&tr->max_lock);
7674         if (tr->cond_snapshot)
7675                 ret = -EBUSY;
7676         arch_spin_unlock(&tr->max_lock);
7677         local_irq_enable();
7678         if (ret)
7679                 goto out;
7680
7681         switch (val) {
7682         case 0:
7683                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7684                         ret = -EINVAL;
7685                         break;
7686                 }
7687                 if (tr->allocated_snapshot)
7688                         free_snapshot(tr);
7689                 break;
7690         case 1:
7691 /* Only allow per-cpu swap if the ring buffer supports it */
7692 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7693                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7694                         ret = -EINVAL;
7695                         break;
7696                 }
7697 #endif
7698                 if (tr->allocated_snapshot)
7699                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7700                                         &tr->array_buffer, iter->cpu_file);
7701                 else
7702                         ret = tracing_alloc_snapshot_instance(tr);
7703                 if (ret < 0)
7704                         break;
7705                 /* Now, we're going to swap */
7706                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7707                         local_irq_disable();
7708                         update_max_tr(tr, current, smp_processor_id(), NULL);
7709                         local_irq_enable();
7710                 } else {
7711                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7712                                                  (void *)tr, 1);
7713                 }
7714                 break;
7715         default:
7716                 if (tr->allocated_snapshot) {
7717                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7718                                 tracing_reset_online_cpus(&tr->max_buffer);
7719                         else
7720                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7721                 }
7722                 break;
7723         }
7724
7725         if (ret >= 0) {
7726                 *ppos += cnt;
7727                 ret = cnt;
7728         }
7729 out:
7730         mutex_unlock(&trace_types_lock);
7731         return ret;
7732 }
7733
7734 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7735 {
7736         struct seq_file *m = file->private_data;
7737         int ret;
7738
7739         ret = tracing_release(inode, file);
7740
7741         if (file->f_mode & FMODE_READ)
7742                 return ret;
7743
7744         /* If write only, the seq_file is just a stub */
7745         if (m)
7746                 kfree(m->private);
7747         kfree(m);
7748
7749         return 0;
7750 }
7751
7752 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7753 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7754                                     size_t count, loff_t *ppos);
7755 static int tracing_buffers_release(struct inode *inode, struct file *file);
7756 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7757                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7758
7759 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7760 {
7761         struct ftrace_buffer_info *info;
7762         int ret;
7763
7764         /* The following checks for tracefs lockdown */
7765         ret = tracing_buffers_open(inode, filp);
7766         if (ret < 0)
7767                 return ret;
7768
7769         info = filp->private_data;
7770
7771         if (info->iter.trace->use_max_tr) {
7772                 tracing_buffers_release(inode, filp);
7773                 return -EBUSY;
7774         }
7775
7776         info->iter.snapshot = true;
7777         info->iter.array_buffer = &info->iter.tr->max_buffer;
7778
7779         return ret;
7780 }
7781
7782 #endif /* CONFIG_TRACER_SNAPSHOT */
7783
7784
7785 static const struct file_operations tracing_thresh_fops = {
7786         .open           = tracing_open_generic,
7787         .read           = tracing_thresh_read,
7788         .write          = tracing_thresh_write,
7789         .llseek         = generic_file_llseek,
7790 };
7791
7792 #ifdef CONFIG_TRACER_MAX_TRACE
7793 static const struct file_operations tracing_max_lat_fops = {
7794         .open           = tracing_open_generic_tr,
7795         .read           = tracing_max_lat_read,
7796         .write          = tracing_max_lat_write,
7797         .llseek         = generic_file_llseek,
7798         .release        = tracing_release_generic_tr,
7799 };
7800 #endif
7801
7802 static const struct file_operations set_tracer_fops = {
7803         .open           = tracing_open_generic_tr,
7804         .read           = tracing_set_trace_read,
7805         .write          = tracing_set_trace_write,
7806         .llseek         = generic_file_llseek,
7807         .release        = tracing_release_generic_tr,
7808 };
7809
7810 static const struct file_operations tracing_pipe_fops = {
7811         .open           = tracing_open_pipe,
7812         .poll           = tracing_poll_pipe,
7813         .read           = tracing_read_pipe,
7814         .splice_read    = tracing_splice_read_pipe,
7815         .release        = tracing_release_pipe,
7816         .llseek         = no_llseek,
7817 };
7818
7819 static const struct file_operations tracing_entries_fops = {
7820         .open           = tracing_open_generic_tr,
7821         .read           = tracing_entries_read,
7822         .write          = tracing_entries_write,
7823         .llseek         = generic_file_llseek,
7824         .release        = tracing_release_generic_tr,
7825 };
7826
7827 static const struct file_operations tracing_total_entries_fops = {
7828         .open           = tracing_open_generic_tr,
7829         .read           = tracing_total_entries_read,
7830         .llseek         = generic_file_llseek,
7831         .release        = tracing_release_generic_tr,
7832 };
7833
7834 static const struct file_operations tracing_free_buffer_fops = {
7835         .open           = tracing_open_generic_tr,
7836         .write          = tracing_free_buffer_write,
7837         .release        = tracing_free_buffer_release,
7838 };
7839
7840 static const struct file_operations tracing_mark_fops = {
7841         .open           = tracing_mark_open,
7842         .write          = tracing_mark_write,
7843         .release        = tracing_release_generic_tr,
7844 };
7845
7846 static const struct file_operations tracing_mark_raw_fops = {
7847         .open           = tracing_mark_open,
7848         .write          = tracing_mark_raw_write,
7849         .release        = tracing_release_generic_tr,
7850 };
7851
7852 static const struct file_operations trace_clock_fops = {
7853         .open           = tracing_clock_open,
7854         .read           = seq_read,
7855         .llseek         = seq_lseek,
7856         .release        = tracing_single_release_tr,
7857         .write          = tracing_clock_write,
7858 };
7859
7860 static const struct file_operations trace_time_stamp_mode_fops = {
7861         .open           = tracing_time_stamp_mode_open,
7862         .read           = seq_read,
7863         .llseek         = seq_lseek,
7864         .release        = tracing_single_release_tr,
7865 };
7866
7867 #ifdef CONFIG_TRACER_SNAPSHOT
7868 static const struct file_operations snapshot_fops = {
7869         .open           = tracing_snapshot_open,
7870         .read           = seq_read,
7871         .write          = tracing_snapshot_write,
7872         .llseek         = tracing_lseek,
7873         .release        = tracing_snapshot_release,
7874 };
7875
7876 static const struct file_operations snapshot_raw_fops = {
7877         .open           = snapshot_raw_open,
7878         .read           = tracing_buffers_read,
7879         .release        = tracing_buffers_release,
7880         .splice_read    = tracing_buffers_splice_read,
7881         .llseek         = no_llseek,
7882 };
7883
7884 #endif /* CONFIG_TRACER_SNAPSHOT */
7885
7886 /*
7887  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7888  * @filp: The active open file structure
7889  * @ubuf: The userspace provided buffer to read value into
7890  * @cnt: The maximum number of bytes to read
7891  * @ppos: The current "file" position
7892  *
7893  * This function implements the write interface for a struct trace_min_max_param.
7894  * The filp->private_data must point to a trace_min_max_param structure that
7895  * defines where to write the value, the min and the max acceptable values,
7896  * and a lock to protect the write.
7897  */
7898 static ssize_t
7899 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7900 {
7901         struct trace_min_max_param *param = filp->private_data;
7902         u64 val;
7903         int err;
7904
7905         if (!param)
7906                 return -EFAULT;
7907
7908         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7909         if (err)
7910                 return err;
7911
7912         if (param->lock)
7913                 mutex_lock(param->lock);
7914
7915         if (param->min && val < *param->min)
7916                 err = -EINVAL;
7917
7918         if (param->max && val > *param->max)
7919                 err = -EINVAL;
7920
7921         if (!err)
7922                 *param->val = val;
7923
7924         if (param->lock)
7925                 mutex_unlock(param->lock);
7926
7927         if (err)
7928                 return err;
7929
7930         return cnt;
7931 }
7932
7933 /*
7934  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7935  * @filp: The active open file structure
7936  * @ubuf: The userspace provided buffer to read value into
7937  * @cnt: The maximum number of bytes to read
7938  * @ppos: The current "file" position
7939  *
7940  * This function implements the read interface for a struct trace_min_max_param.
7941  * The filp->private_data must point to a trace_min_max_param struct with valid
7942  * data.
7943  */
7944 static ssize_t
7945 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7946 {
7947         struct trace_min_max_param *param = filp->private_data;
7948         char buf[U64_STR_SIZE];
7949         int len;
7950         u64 val;
7951
7952         if (!param)
7953                 return -EFAULT;
7954
7955         val = *param->val;
7956
7957         if (cnt > sizeof(buf))
7958                 cnt = sizeof(buf);
7959
7960         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7961
7962         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7963 }
7964
7965 const struct file_operations trace_min_max_fops = {
7966         .open           = tracing_open_generic,
7967         .read           = trace_min_max_read,
7968         .write          = trace_min_max_write,
7969 };
7970
7971 #define TRACING_LOG_ERRS_MAX    8
7972 #define TRACING_LOG_LOC_MAX     128
7973
7974 #define CMD_PREFIX "  Command: "
7975
7976 struct err_info {
7977         const char      **errs; /* ptr to loc-specific array of err strings */
7978         u8              type;   /* index into errs -> specific err string */
7979         u16             pos;    /* caret position */
7980         u64             ts;
7981 };
7982
7983 struct tracing_log_err {
7984         struct list_head        list;
7985         struct err_info         info;
7986         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7987         char                    *cmd;                     /* what caused err */
7988 };
7989
7990 static DEFINE_MUTEX(tracing_err_log_lock);
7991
7992 static struct tracing_log_err *alloc_tracing_log_err(int len)
7993 {
7994         struct tracing_log_err *err;
7995
7996         err = kzalloc(sizeof(*err), GFP_KERNEL);
7997         if (!err)
7998                 return ERR_PTR(-ENOMEM);
7999
8000         err->cmd = kzalloc(len, GFP_KERNEL);
8001         if (!err->cmd) {
8002                 kfree(err);
8003                 return ERR_PTR(-ENOMEM);
8004         }
8005
8006         return err;
8007 }
8008
8009 static void free_tracing_log_err(struct tracing_log_err *err)
8010 {
8011         kfree(err->cmd);
8012         kfree(err);
8013 }
8014
8015 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8016                                                    int len)
8017 {
8018         struct tracing_log_err *err;
8019         char *cmd;
8020
8021         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8022                 err = alloc_tracing_log_err(len);
8023                 if (PTR_ERR(err) != -ENOMEM)
8024                         tr->n_err_log_entries++;
8025
8026                 return err;
8027         }
8028         cmd = kzalloc(len, GFP_KERNEL);
8029         if (!cmd)
8030                 return ERR_PTR(-ENOMEM);
8031         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8032         kfree(err->cmd);
8033         err->cmd = cmd;
8034         list_del(&err->list);
8035
8036         return err;
8037 }
8038
8039 /**
8040  * err_pos - find the position of a string within a command for error careting
8041  * @cmd: The tracing command that caused the error
8042  * @str: The string to position the caret at within @cmd
8043  *
8044  * Finds the position of the first occurrence of @str within @cmd.  The
8045  * return value can be passed to tracing_log_err() for caret placement
8046  * within @cmd.
8047  *
8048  * Returns the index within @cmd of the first occurrence of @str or 0
8049  * if @str was not found.
8050  */
8051 unsigned int err_pos(char *cmd, const char *str)
8052 {
8053         char *found;
8054
8055         if (WARN_ON(!strlen(cmd)))
8056                 return 0;
8057
8058         found = strstr(cmd, str);
8059         if (found)
8060                 return found - cmd;
8061
8062         return 0;
8063 }
8064
8065 /**
8066  * tracing_log_err - write an error to the tracing error log
8067  * @tr: The associated trace array for the error (NULL for top level array)
8068  * @loc: A string describing where the error occurred
8069  * @cmd: The tracing command that caused the error
8070  * @errs: The array of loc-specific static error strings
8071  * @type: The index into errs[], which produces the specific static err string
8072  * @pos: The position the caret should be placed in the cmd
8073  *
8074  * Writes an error into tracing/error_log of the form:
8075  *
8076  * <loc>: error: <text>
8077  *   Command: <cmd>
8078  *              ^
8079  *
8080  * tracing/error_log is a small log file containing the last
8081  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8082  * unless there has been a tracing error, and the error log can be
8083  * cleared and have its memory freed by writing the empty string in
8084  * truncation mode to it i.e. echo > tracing/error_log.
8085  *
8086  * NOTE: the @errs array along with the @type param are used to
8087  * produce a static error string - this string is not copied and saved
8088  * when the error is logged - only a pointer to it is saved.  See
8089  * existing callers for examples of how static strings are typically
8090  * defined for use with tracing_log_err().
8091  */
8092 void tracing_log_err(struct trace_array *tr,
8093                      const char *loc, const char *cmd,
8094                      const char **errs, u8 type, u16 pos)
8095 {
8096         struct tracing_log_err *err;
8097         int len = 0;
8098
8099         if (!tr)
8100                 tr = &global_trace;
8101
8102         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8103
8104         mutex_lock(&tracing_err_log_lock);
8105         err = get_tracing_log_err(tr, len);
8106         if (PTR_ERR(err) == -ENOMEM) {
8107                 mutex_unlock(&tracing_err_log_lock);
8108                 return;
8109         }
8110
8111         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8112         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8113
8114         err->info.errs = errs;
8115         err->info.type = type;
8116         err->info.pos = pos;
8117         err->info.ts = local_clock();
8118
8119         list_add_tail(&err->list, &tr->err_log);
8120         mutex_unlock(&tracing_err_log_lock);
8121 }
8122
8123 static void clear_tracing_err_log(struct trace_array *tr)
8124 {
8125         struct tracing_log_err *err, *next;
8126
8127         mutex_lock(&tracing_err_log_lock);
8128         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8129                 list_del(&err->list);
8130                 free_tracing_log_err(err);
8131         }
8132
8133         tr->n_err_log_entries = 0;
8134         mutex_unlock(&tracing_err_log_lock);
8135 }
8136
8137 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8138 {
8139         struct trace_array *tr = m->private;
8140
8141         mutex_lock(&tracing_err_log_lock);
8142
8143         return seq_list_start(&tr->err_log, *pos);
8144 }
8145
8146 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8147 {
8148         struct trace_array *tr = m->private;
8149
8150         return seq_list_next(v, &tr->err_log, pos);
8151 }
8152
8153 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8154 {
8155         mutex_unlock(&tracing_err_log_lock);
8156 }
8157
8158 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8159 {
8160         u16 i;
8161
8162         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8163                 seq_putc(m, ' ');
8164         for (i = 0; i < pos; i++)
8165                 seq_putc(m, ' ');
8166         seq_puts(m, "^\n");
8167 }
8168
8169 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8170 {
8171         struct tracing_log_err *err = v;
8172
8173         if (err) {
8174                 const char *err_text = err->info.errs[err->info.type];
8175                 u64 sec = err->info.ts;
8176                 u32 nsec;
8177
8178                 nsec = do_div(sec, NSEC_PER_SEC);
8179                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8180                            err->loc, err_text);
8181                 seq_printf(m, "%s", err->cmd);
8182                 tracing_err_log_show_pos(m, err->info.pos);
8183         }
8184
8185         return 0;
8186 }
8187
8188 static const struct seq_operations tracing_err_log_seq_ops = {
8189         .start  = tracing_err_log_seq_start,
8190         .next   = tracing_err_log_seq_next,
8191         .stop   = tracing_err_log_seq_stop,
8192         .show   = tracing_err_log_seq_show
8193 };
8194
8195 static int tracing_err_log_open(struct inode *inode, struct file *file)
8196 {
8197         struct trace_array *tr = inode->i_private;
8198         int ret = 0;
8199
8200         ret = tracing_check_open_get_tr(tr);
8201         if (ret)
8202                 return ret;
8203
8204         /* If this file was opened for write, then erase contents */
8205         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8206                 clear_tracing_err_log(tr);
8207
8208         if (file->f_mode & FMODE_READ) {
8209                 ret = seq_open(file, &tracing_err_log_seq_ops);
8210                 if (!ret) {
8211                         struct seq_file *m = file->private_data;
8212                         m->private = tr;
8213                 } else {
8214                         trace_array_put(tr);
8215                 }
8216         }
8217         return ret;
8218 }
8219
8220 static ssize_t tracing_err_log_write(struct file *file,
8221                                      const char __user *buffer,
8222                                      size_t count, loff_t *ppos)
8223 {
8224         return count;
8225 }
8226
8227 static int tracing_err_log_release(struct inode *inode, struct file *file)
8228 {
8229         struct trace_array *tr = inode->i_private;
8230
8231         trace_array_put(tr);
8232
8233         if (file->f_mode & FMODE_READ)
8234                 seq_release(inode, file);
8235
8236         return 0;
8237 }
8238
8239 static const struct file_operations tracing_err_log_fops = {
8240         .open           = tracing_err_log_open,
8241         .write          = tracing_err_log_write,
8242         .read           = seq_read,
8243         .llseek         = tracing_lseek,
8244         .release        = tracing_err_log_release,
8245 };
8246
8247 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8248 {
8249         struct trace_array *tr = inode->i_private;
8250         struct ftrace_buffer_info *info;
8251         int ret;
8252
8253         ret = tracing_check_open_get_tr(tr);
8254         if (ret)
8255                 return ret;
8256
8257         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8258         if (!info) {
8259                 trace_array_put(tr);
8260                 return -ENOMEM;
8261         }
8262
8263         mutex_lock(&trace_types_lock);
8264
8265         info->iter.tr           = tr;
8266         info->iter.cpu_file     = tracing_get_cpu(inode);
8267         info->iter.trace        = tr->current_trace;
8268         info->iter.array_buffer = &tr->array_buffer;
8269         info->spare             = NULL;
8270         /* Force reading ring buffer for first read */
8271         info->read              = (unsigned int)-1;
8272
8273         filp->private_data = info;
8274
8275         tr->trace_ref++;
8276
8277         mutex_unlock(&trace_types_lock);
8278
8279         ret = nonseekable_open(inode, filp);
8280         if (ret < 0)
8281                 trace_array_put(tr);
8282
8283         return ret;
8284 }
8285
8286 static __poll_t
8287 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8288 {
8289         struct ftrace_buffer_info *info = filp->private_data;
8290         struct trace_iterator *iter = &info->iter;
8291
8292         return trace_poll(iter, filp, poll_table);
8293 }
8294
8295 static ssize_t
8296 tracing_buffers_read(struct file *filp, char __user *ubuf,
8297                      size_t count, loff_t *ppos)
8298 {
8299         struct ftrace_buffer_info *info = filp->private_data;
8300         struct trace_iterator *iter = &info->iter;
8301         ssize_t ret = 0;
8302         ssize_t size;
8303
8304         if (!count)
8305                 return 0;
8306
8307 #ifdef CONFIG_TRACER_MAX_TRACE
8308         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8309                 return -EBUSY;
8310 #endif
8311
8312         if (!info->spare) {
8313                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8314                                                           iter->cpu_file);
8315                 if (IS_ERR(info->spare)) {
8316                         ret = PTR_ERR(info->spare);
8317                         info->spare = NULL;
8318                 } else {
8319                         info->spare_cpu = iter->cpu_file;
8320                 }
8321         }
8322         if (!info->spare)
8323                 return ret;
8324
8325         /* Do we have previous read data to read? */
8326         if (info->read < PAGE_SIZE)
8327                 goto read;
8328
8329  again:
8330         trace_access_lock(iter->cpu_file);
8331         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8332                                     &info->spare,
8333                                     count,
8334                                     iter->cpu_file, 0);
8335         trace_access_unlock(iter->cpu_file);
8336
8337         if (ret < 0) {
8338                 if (trace_empty(iter)) {
8339                         if ((filp->f_flags & O_NONBLOCK))
8340                                 return -EAGAIN;
8341
8342                         ret = wait_on_pipe(iter, 0);
8343                         if (ret)
8344                                 return ret;
8345
8346                         goto again;
8347                 }
8348                 return 0;
8349         }
8350
8351         info->read = 0;
8352  read:
8353         size = PAGE_SIZE - info->read;
8354         if (size > count)
8355                 size = count;
8356
8357         ret = copy_to_user(ubuf, info->spare + info->read, size);
8358         if (ret == size)
8359                 return -EFAULT;
8360
8361         size -= ret;
8362
8363         *ppos += size;
8364         info->read += size;
8365
8366         return size;
8367 }
8368
8369 static int tracing_buffers_release(struct inode *inode, struct file *file)
8370 {
8371         struct ftrace_buffer_info *info = file->private_data;
8372         struct trace_iterator *iter = &info->iter;
8373
8374         mutex_lock(&trace_types_lock);
8375
8376         iter->tr->trace_ref--;
8377
8378         __trace_array_put(iter->tr);
8379
8380         iter->wait_index++;
8381         /* Make sure the waiters see the new wait_index */
8382         smp_wmb();
8383
8384         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8385
8386         if (info->spare)
8387                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8388                                            info->spare_cpu, info->spare);
8389         kvfree(info);
8390
8391         mutex_unlock(&trace_types_lock);
8392
8393         return 0;
8394 }
8395
8396 struct buffer_ref {
8397         struct trace_buffer     *buffer;
8398         void                    *page;
8399         int                     cpu;
8400         refcount_t              refcount;
8401 };
8402
8403 static void buffer_ref_release(struct buffer_ref *ref)
8404 {
8405         if (!refcount_dec_and_test(&ref->refcount))
8406                 return;
8407         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8408         kfree(ref);
8409 }
8410
8411 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8412                                     struct pipe_buffer *buf)
8413 {
8414         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8415
8416         buffer_ref_release(ref);
8417         buf->private = 0;
8418 }
8419
8420 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8421                                 struct pipe_buffer *buf)
8422 {
8423         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8424
8425         if (refcount_read(&ref->refcount) > INT_MAX/2)
8426                 return false;
8427
8428         refcount_inc(&ref->refcount);
8429         return true;
8430 }
8431
8432 /* Pipe buffer operations for a buffer. */
8433 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8434         .release                = buffer_pipe_buf_release,
8435         .get                    = buffer_pipe_buf_get,
8436 };
8437
8438 /*
8439  * Callback from splice_to_pipe(), if we need to release some pages
8440  * at the end of the spd in case we error'ed out in filling the pipe.
8441  */
8442 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8443 {
8444         struct buffer_ref *ref =
8445                 (struct buffer_ref *)spd->partial[i].private;
8446
8447         buffer_ref_release(ref);
8448         spd->partial[i].private = 0;
8449 }
8450
8451 static ssize_t
8452 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8453                             struct pipe_inode_info *pipe, size_t len,
8454                             unsigned int flags)
8455 {
8456         struct ftrace_buffer_info *info = file->private_data;
8457         struct trace_iterator *iter = &info->iter;
8458         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8459         struct page *pages_def[PIPE_DEF_BUFFERS];
8460         struct splice_pipe_desc spd = {
8461                 .pages          = pages_def,
8462                 .partial        = partial_def,
8463                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8464                 .ops            = &buffer_pipe_buf_ops,
8465                 .spd_release    = buffer_spd_release,
8466         };
8467         struct buffer_ref *ref;
8468         int entries, i;
8469         ssize_t ret = 0;
8470
8471 #ifdef CONFIG_TRACER_MAX_TRACE
8472         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8473                 return -EBUSY;
8474 #endif
8475
8476         if (*ppos & (PAGE_SIZE - 1))
8477                 return -EINVAL;
8478
8479         if (len & (PAGE_SIZE - 1)) {
8480                 if (len < PAGE_SIZE)
8481                         return -EINVAL;
8482                 len &= PAGE_MASK;
8483         }
8484
8485         if (splice_grow_spd(pipe, &spd))
8486                 return -ENOMEM;
8487
8488  again:
8489         trace_access_lock(iter->cpu_file);
8490         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8491
8492         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8493                 struct page *page;
8494                 int r;
8495
8496                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8497                 if (!ref) {
8498                         ret = -ENOMEM;
8499                         break;
8500                 }
8501
8502                 refcount_set(&ref->refcount, 1);
8503                 ref->buffer = iter->array_buffer->buffer;
8504                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8505                 if (IS_ERR(ref->page)) {
8506                         ret = PTR_ERR(ref->page);
8507                         ref->page = NULL;
8508                         kfree(ref);
8509                         break;
8510                 }
8511                 ref->cpu = iter->cpu_file;
8512
8513                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8514                                           len, iter->cpu_file, 1);
8515                 if (r < 0) {
8516                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8517                                                    ref->page);
8518                         kfree(ref);
8519                         break;
8520                 }
8521
8522                 page = virt_to_page(ref->page);
8523
8524                 spd.pages[i] = page;
8525                 spd.partial[i].len = PAGE_SIZE;
8526                 spd.partial[i].offset = 0;
8527                 spd.partial[i].private = (unsigned long)ref;
8528                 spd.nr_pages++;
8529                 *ppos += PAGE_SIZE;
8530
8531                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8532         }
8533
8534         trace_access_unlock(iter->cpu_file);
8535         spd.nr_pages = i;
8536
8537         /* did we read anything? */
8538         if (!spd.nr_pages) {
8539                 long wait_index;
8540
8541                 if (ret)
8542                         goto out;
8543
8544                 ret = -EAGAIN;
8545                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8546                         goto out;
8547
8548                 wait_index = READ_ONCE(iter->wait_index);
8549
8550                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8551                 if (ret)
8552                         goto out;
8553
8554                 /* No need to wait after waking up when tracing is off */
8555                 if (!tracer_tracing_is_on(iter->tr))
8556                         goto out;
8557
8558                 /* Make sure we see the new wait_index */
8559                 smp_rmb();
8560                 if (wait_index != iter->wait_index)
8561                         goto out;
8562
8563                 goto again;
8564         }
8565
8566         ret = splice_to_pipe(pipe, &spd);
8567 out:
8568         splice_shrink_spd(&spd);
8569
8570         return ret;
8571 }
8572
8573 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8574 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8575 {
8576         struct ftrace_buffer_info *info = file->private_data;
8577         struct trace_iterator *iter = &info->iter;
8578
8579         if (cmd)
8580                 return -ENOIOCTLCMD;
8581
8582         mutex_lock(&trace_types_lock);
8583
8584         iter->wait_index++;
8585         /* Make sure the waiters see the new wait_index */
8586         smp_wmb();
8587
8588         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8589
8590         mutex_unlock(&trace_types_lock);
8591         return 0;
8592 }
8593
8594 static const struct file_operations tracing_buffers_fops = {
8595         .open           = tracing_buffers_open,
8596         .read           = tracing_buffers_read,
8597         .poll           = tracing_buffers_poll,
8598         .release        = tracing_buffers_release,
8599         .splice_read    = tracing_buffers_splice_read,
8600         .unlocked_ioctl = tracing_buffers_ioctl,
8601         .llseek         = no_llseek,
8602 };
8603
8604 static ssize_t
8605 tracing_stats_read(struct file *filp, char __user *ubuf,
8606                    size_t count, loff_t *ppos)
8607 {
8608         struct inode *inode = file_inode(filp);
8609         struct trace_array *tr = inode->i_private;
8610         struct array_buffer *trace_buf = &tr->array_buffer;
8611         int cpu = tracing_get_cpu(inode);
8612         struct trace_seq *s;
8613         unsigned long cnt;
8614         unsigned long long t;
8615         unsigned long usec_rem;
8616
8617         s = kmalloc(sizeof(*s), GFP_KERNEL);
8618         if (!s)
8619                 return -ENOMEM;
8620
8621         trace_seq_init(s);
8622
8623         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8624         trace_seq_printf(s, "entries: %ld\n", cnt);
8625
8626         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8627         trace_seq_printf(s, "overrun: %ld\n", cnt);
8628
8629         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8630         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8631
8632         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8633         trace_seq_printf(s, "bytes: %ld\n", cnt);
8634
8635         if (trace_clocks[tr->clock_id].in_ns) {
8636                 /* local or global for trace_clock */
8637                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8638                 usec_rem = do_div(t, USEC_PER_SEC);
8639                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8640                                                                 t, usec_rem);
8641
8642                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8643                 usec_rem = do_div(t, USEC_PER_SEC);
8644                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8645         } else {
8646                 /* counter or tsc mode for trace_clock */
8647                 trace_seq_printf(s, "oldest event ts: %llu\n",
8648                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8649
8650                 trace_seq_printf(s, "now ts: %llu\n",
8651                                 ring_buffer_time_stamp(trace_buf->buffer));
8652         }
8653
8654         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8655         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8656
8657         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8658         trace_seq_printf(s, "read events: %ld\n", cnt);
8659
8660         count = simple_read_from_buffer(ubuf, count, ppos,
8661                                         s->buffer, trace_seq_used(s));
8662
8663         kfree(s);
8664
8665         return count;
8666 }
8667
8668 static const struct file_operations tracing_stats_fops = {
8669         .open           = tracing_open_generic_tr,
8670         .read           = tracing_stats_read,
8671         .llseek         = generic_file_llseek,
8672         .release        = tracing_release_generic_tr,
8673 };
8674
8675 #ifdef CONFIG_DYNAMIC_FTRACE
8676
8677 static ssize_t
8678 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8679                   size_t cnt, loff_t *ppos)
8680 {
8681         ssize_t ret;
8682         char *buf;
8683         int r;
8684
8685         /* 256 should be plenty to hold the amount needed */
8686         buf = kmalloc(256, GFP_KERNEL);
8687         if (!buf)
8688                 return -ENOMEM;
8689
8690         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8691                       ftrace_update_tot_cnt,
8692                       ftrace_number_of_pages,
8693                       ftrace_number_of_groups);
8694
8695         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8696         kfree(buf);
8697         return ret;
8698 }
8699
8700 static const struct file_operations tracing_dyn_info_fops = {
8701         .open           = tracing_open_generic,
8702         .read           = tracing_read_dyn_info,
8703         .llseek         = generic_file_llseek,
8704 };
8705 #endif /* CONFIG_DYNAMIC_FTRACE */
8706
8707 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8708 static void
8709 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8710                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8711                 void *data)
8712 {
8713         tracing_snapshot_instance(tr);
8714 }
8715
8716 static void
8717 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8718                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8719                       void *data)
8720 {
8721         struct ftrace_func_mapper *mapper = data;
8722         long *count = NULL;
8723
8724         if (mapper)
8725                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8726
8727         if (count) {
8728
8729                 if (*count <= 0)
8730                         return;
8731
8732                 (*count)--;
8733         }
8734
8735         tracing_snapshot_instance(tr);
8736 }
8737
8738 static int
8739 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8740                       struct ftrace_probe_ops *ops, void *data)
8741 {
8742         struct ftrace_func_mapper *mapper = data;
8743         long *count = NULL;
8744
8745         seq_printf(m, "%ps:", (void *)ip);
8746
8747         seq_puts(m, "snapshot");
8748
8749         if (mapper)
8750                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8751
8752         if (count)
8753                 seq_printf(m, ":count=%ld\n", *count);
8754         else
8755                 seq_puts(m, ":unlimited\n");
8756
8757         return 0;
8758 }
8759
8760 static int
8761 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8762                      unsigned long ip, void *init_data, void **data)
8763 {
8764         struct ftrace_func_mapper *mapper = *data;
8765
8766         if (!mapper) {
8767                 mapper = allocate_ftrace_func_mapper();
8768                 if (!mapper)
8769                         return -ENOMEM;
8770                 *data = mapper;
8771         }
8772
8773         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8774 }
8775
8776 static void
8777 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8778                      unsigned long ip, void *data)
8779 {
8780         struct ftrace_func_mapper *mapper = data;
8781
8782         if (!ip) {
8783                 if (!mapper)
8784                         return;
8785                 free_ftrace_func_mapper(mapper, NULL);
8786                 return;
8787         }
8788
8789         ftrace_func_mapper_remove_ip(mapper, ip);
8790 }
8791
8792 static struct ftrace_probe_ops snapshot_probe_ops = {
8793         .func                   = ftrace_snapshot,
8794         .print                  = ftrace_snapshot_print,
8795 };
8796
8797 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8798         .func                   = ftrace_count_snapshot,
8799         .print                  = ftrace_snapshot_print,
8800         .init                   = ftrace_snapshot_init,
8801         .free                   = ftrace_snapshot_free,
8802 };
8803
8804 static int
8805 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8806                                char *glob, char *cmd, char *param, int enable)
8807 {
8808         struct ftrace_probe_ops *ops;
8809         void *count = (void *)-1;
8810         char *number;
8811         int ret;
8812
8813         if (!tr)
8814                 return -ENODEV;
8815
8816         /* hash funcs only work with set_ftrace_filter */
8817         if (!enable)
8818                 return -EINVAL;
8819
8820         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8821
8822         if (glob[0] == '!')
8823                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8824
8825         if (!param)
8826                 goto out_reg;
8827
8828         number = strsep(&param, ":");
8829
8830         if (!strlen(number))
8831                 goto out_reg;
8832
8833         /*
8834          * We use the callback data field (which is a pointer)
8835          * as our counter.
8836          */
8837         ret = kstrtoul(number, 0, (unsigned long *)&count);
8838         if (ret)
8839                 return ret;
8840
8841  out_reg:
8842         ret = tracing_alloc_snapshot_instance(tr);
8843         if (ret < 0)
8844                 goto out;
8845
8846         ret = register_ftrace_function_probe(glob, tr, ops, count);
8847
8848  out:
8849         return ret < 0 ? ret : 0;
8850 }
8851
8852 static struct ftrace_func_command ftrace_snapshot_cmd = {
8853         .name                   = "snapshot",
8854         .func                   = ftrace_trace_snapshot_callback,
8855 };
8856
8857 static __init int register_snapshot_cmd(void)
8858 {
8859         return register_ftrace_command(&ftrace_snapshot_cmd);
8860 }
8861 #else
8862 static inline __init int register_snapshot_cmd(void) { return 0; }
8863 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8864
8865 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8866 {
8867         if (WARN_ON(!tr->dir))
8868                 return ERR_PTR(-ENODEV);
8869
8870         /* Top directory uses NULL as the parent */
8871         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8872                 return NULL;
8873
8874         /* All sub buffers have a descriptor */
8875         return tr->dir;
8876 }
8877
8878 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8879 {
8880         struct dentry *d_tracer;
8881
8882         if (tr->percpu_dir)
8883                 return tr->percpu_dir;
8884
8885         d_tracer = tracing_get_dentry(tr);
8886         if (IS_ERR(d_tracer))
8887                 return NULL;
8888
8889         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8890
8891         MEM_FAIL(!tr->percpu_dir,
8892                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8893
8894         return tr->percpu_dir;
8895 }
8896
8897 static struct dentry *
8898 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8899                       void *data, long cpu, const struct file_operations *fops)
8900 {
8901         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8902
8903         if (ret) /* See tracing_get_cpu() */
8904                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8905         return ret;
8906 }
8907
8908 static void
8909 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8910 {
8911         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8912         struct dentry *d_cpu;
8913         char cpu_dir[30]; /* 30 characters should be more than enough */
8914
8915         if (!d_percpu)
8916                 return;
8917
8918         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8919         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8920         if (!d_cpu) {
8921                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8922                 return;
8923         }
8924
8925         /* per cpu trace_pipe */
8926         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8927                                 tr, cpu, &tracing_pipe_fops);
8928
8929         /* per cpu trace */
8930         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8931                                 tr, cpu, &tracing_fops);
8932
8933         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8934                                 tr, cpu, &tracing_buffers_fops);
8935
8936         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8937                                 tr, cpu, &tracing_stats_fops);
8938
8939         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8940                                 tr, cpu, &tracing_entries_fops);
8941
8942 #ifdef CONFIG_TRACER_SNAPSHOT
8943         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8944                                 tr, cpu, &snapshot_fops);
8945
8946         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8947                                 tr, cpu, &snapshot_raw_fops);
8948 #endif
8949 }
8950
8951 #ifdef CONFIG_FTRACE_SELFTEST
8952 /* Let selftest have access to static functions in this file */
8953 #include "trace_selftest.c"
8954 #endif
8955
8956 static ssize_t
8957 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8958                         loff_t *ppos)
8959 {
8960         struct trace_option_dentry *topt = filp->private_data;
8961         char *buf;
8962
8963         if (topt->flags->val & topt->opt->bit)
8964                 buf = "1\n";
8965         else
8966                 buf = "0\n";
8967
8968         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8969 }
8970
8971 static ssize_t
8972 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8973                          loff_t *ppos)
8974 {
8975         struct trace_option_dentry *topt = filp->private_data;
8976         unsigned long val;
8977         int ret;
8978
8979         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8980         if (ret)
8981                 return ret;
8982
8983         if (val != 0 && val != 1)
8984                 return -EINVAL;
8985
8986         if (!!(topt->flags->val & topt->opt->bit) != val) {
8987                 mutex_lock(&trace_types_lock);
8988                 ret = __set_tracer_option(topt->tr, topt->flags,
8989                                           topt->opt, !val);
8990                 mutex_unlock(&trace_types_lock);
8991                 if (ret)
8992                         return ret;
8993         }
8994
8995         *ppos += cnt;
8996
8997         return cnt;
8998 }
8999
9000 static int tracing_open_options(struct inode *inode, struct file *filp)
9001 {
9002         struct trace_option_dentry *topt = inode->i_private;
9003         int ret;
9004
9005         ret = tracing_check_open_get_tr(topt->tr);
9006         if (ret)
9007                 return ret;
9008
9009         filp->private_data = inode->i_private;
9010         return 0;
9011 }
9012
9013 static int tracing_release_options(struct inode *inode, struct file *file)
9014 {
9015         struct trace_option_dentry *topt = file->private_data;
9016
9017         trace_array_put(topt->tr);
9018         return 0;
9019 }
9020
9021 static const struct file_operations trace_options_fops = {
9022         .open = tracing_open_options,
9023         .read = trace_options_read,
9024         .write = trace_options_write,
9025         .llseek = generic_file_llseek,
9026         .release = tracing_release_options,
9027 };
9028
9029 /*
9030  * In order to pass in both the trace_array descriptor as well as the index
9031  * to the flag that the trace option file represents, the trace_array
9032  * has a character array of trace_flags_index[], which holds the index
9033  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9034  * The address of this character array is passed to the flag option file
9035  * read/write callbacks.
9036  *
9037  * In order to extract both the index and the trace_array descriptor,
9038  * get_tr_index() uses the following algorithm.
9039  *
9040  *   idx = *ptr;
9041  *
9042  * As the pointer itself contains the address of the index (remember
9043  * index[1] == 1).
9044  *
9045  * Then to get the trace_array descriptor, by subtracting that index
9046  * from the ptr, we get to the start of the index itself.
9047  *
9048  *   ptr - idx == &index[0]
9049  *
9050  * Then a simple container_of() from that pointer gets us to the
9051  * trace_array descriptor.
9052  */
9053 static void get_tr_index(void *data, struct trace_array **ptr,
9054                          unsigned int *pindex)
9055 {
9056         *pindex = *(unsigned char *)data;
9057
9058         *ptr = container_of(data - *pindex, struct trace_array,
9059                             trace_flags_index);
9060 }
9061
9062 static ssize_t
9063 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9064                         loff_t *ppos)
9065 {
9066         void *tr_index = filp->private_data;
9067         struct trace_array *tr;
9068         unsigned int index;
9069         char *buf;
9070
9071         get_tr_index(tr_index, &tr, &index);
9072
9073         if (tr->trace_flags & (1 << index))
9074                 buf = "1\n";
9075         else
9076                 buf = "0\n";
9077
9078         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9079 }
9080
9081 static ssize_t
9082 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9083                          loff_t *ppos)
9084 {
9085         void *tr_index = filp->private_data;
9086         struct trace_array *tr;
9087         unsigned int index;
9088         unsigned long val;
9089         int ret;
9090
9091         get_tr_index(tr_index, &tr, &index);
9092
9093         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9094         if (ret)
9095                 return ret;
9096
9097         if (val != 0 && val != 1)
9098                 return -EINVAL;
9099
9100         mutex_lock(&event_mutex);
9101         mutex_lock(&trace_types_lock);
9102         ret = set_tracer_flag(tr, 1 << index, val);
9103         mutex_unlock(&trace_types_lock);
9104         mutex_unlock(&event_mutex);
9105
9106         if (ret < 0)
9107                 return ret;
9108
9109         *ppos += cnt;
9110
9111         return cnt;
9112 }
9113
9114 static const struct file_operations trace_options_core_fops = {
9115         .open = tracing_open_generic,
9116         .read = trace_options_core_read,
9117         .write = trace_options_core_write,
9118         .llseek = generic_file_llseek,
9119 };
9120
9121 struct dentry *trace_create_file(const char *name,
9122                                  umode_t mode,
9123                                  struct dentry *parent,
9124                                  void *data,
9125                                  const struct file_operations *fops)
9126 {
9127         struct dentry *ret;
9128
9129         ret = tracefs_create_file(name, mode, parent, data, fops);
9130         if (!ret)
9131                 pr_warn("Could not create tracefs '%s' entry\n", name);
9132
9133         return ret;
9134 }
9135
9136
9137 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9138 {
9139         struct dentry *d_tracer;
9140
9141         if (tr->options)
9142                 return tr->options;
9143
9144         d_tracer = tracing_get_dentry(tr);
9145         if (IS_ERR(d_tracer))
9146                 return NULL;
9147
9148         tr->options = tracefs_create_dir("options", d_tracer);
9149         if (!tr->options) {
9150                 pr_warn("Could not create tracefs directory 'options'\n");
9151                 return NULL;
9152         }
9153
9154         return tr->options;
9155 }
9156
9157 static void
9158 create_trace_option_file(struct trace_array *tr,
9159                          struct trace_option_dentry *topt,
9160                          struct tracer_flags *flags,
9161                          struct tracer_opt *opt)
9162 {
9163         struct dentry *t_options;
9164
9165         t_options = trace_options_init_dentry(tr);
9166         if (!t_options)
9167                 return;
9168
9169         topt->flags = flags;
9170         topt->opt = opt;
9171         topt->tr = tr;
9172
9173         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9174                                         t_options, topt, &trace_options_fops);
9175
9176 }
9177
9178 static void
9179 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9180 {
9181         struct trace_option_dentry *topts;
9182         struct trace_options *tr_topts;
9183         struct tracer_flags *flags;
9184         struct tracer_opt *opts;
9185         int cnt;
9186         int i;
9187
9188         if (!tracer)
9189                 return;
9190
9191         flags = tracer->flags;
9192
9193         if (!flags || !flags->opts)
9194                 return;
9195
9196         /*
9197          * If this is an instance, only create flags for tracers
9198          * the instance may have.
9199          */
9200         if (!trace_ok_for_array(tracer, tr))
9201                 return;
9202
9203         for (i = 0; i < tr->nr_topts; i++) {
9204                 /* Make sure there's no duplicate flags. */
9205                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9206                         return;
9207         }
9208
9209         opts = flags->opts;
9210
9211         for (cnt = 0; opts[cnt].name; cnt++)
9212                 ;
9213
9214         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9215         if (!topts)
9216                 return;
9217
9218         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9219                             GFP_KERNEL);
9220         if (!tr_topts) {
9221                 kfree(topts);
9222                 return;
9223         }
9224
9225         tr->topts = tr_topts;
9226         tr->topts[tr->nr_topts].tracer = tracer;
9227         tr->topts[tr->nr_topts].topts = topts;
9228         tr->nr_topts++;
9229
9230         for (cnt = 0; opts[cnt].name; cnt++) {
9231                 create_trace_option_file(tr, &topts[cnt], flags,
9232                                          &opts[cnt]);
9233                 MEM_FAIL(topts[cnt].entry == NULL,
9234                           "Failed to create trace option: %s",
9235                           opts[cnt].name);
9236         }
9237 }
9238
9239 static struct dentry *
9240 create_trace_option_core_file(struct trace_array *tr,
9241                               const char *option, long index)
9242 {
9243         struct dentry *t_options;
9244
9245         t_options = trace_options_init_dentry(tr);
9246         if (!t_options)
9247                 return NULL;
9248
9249         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9250                                  (void *)&tr->trace_flags_index[index],
9251                                  &trace_options_core_fops);
9252 }
9253
9254 static void create_trace_options_dir(struct trace_array *tr)
9255 {
9256         struct dentry *t_options;
9257         bool top_level = tr == &global_trace;
9258         int i;
9259
9260         t_options = trace_options_init_dentry(tr);
9261         if (!t_options)
9262                 return;
9263
9264         for (i = 0; trace_options[i]; i++) {
9265                 if (top_level ||
9266                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9267                         create_trace_option_core_file(tr, trace_options[i], i);
9268         }
9269 }
9270
9271 static ssize_t
9272 rb_simple_read(struct file *filp, char __user *ubuf,
9273                size_t cnt, loff_t *ppos)
9274 {
9275         struct trace_array *tr = filp->private_data;
9276         char buf[64];
9277         int r;
9278
9279         r = tracer_tracing_is_on(tr);
9280         r = sprintf(buf, "%d\n", r);
9281
9282         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9283 }
9284
9285 static ssize_t
9286 rb_simple_write(struct file *filp, const char __user *ubuf,
9287                 size_t cnt, loff_t *ppos)
9288 {
9289         struct trace_array *tr = filp->private_data;
9290         struct trace_buffer *buffer = tr->array_buffer.buffer;
9291         unsigned long val;
9292         int ret;
9293
9294         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9295         if (ret)
9296                 return ret;
9297
9298         if (buffer) {
9299                 mutex_lock(&trace_types_lock);
9300                 if (!!val == tracer_tracing_is_on(tr)) {
9301                         val = 0; /* do nothing */
9302                 } else if (val) {
9303                         tracer_tracing_on(tr);
9304                         if (tr->current_trace->start)
9305                                 tr->current_trace->start(tr);
9306                 } else {
9307                         tracer_tracing_off(tr);
9308                         if (tr->current_trace->stop)
9309                                 tr->current_trace->stop(tr);
9310                         /* Wake up any waiters */
9311                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9312                 }
9313                 mutex_unlock(&trace_types_lock);
9314         }
9315
9316         (*ppos)++;
9317
9318         return cnt;
9319 }
9320
9321 static const struct file_operations rb_simple_fops = {
9322         .open           = tracing_open_generic_tr,
9323         .read           = rb_simple_read,
9324         .write          = rb_simple_write,
9325         .release        = tracing_release_generic_tr,
9326         .llseek         = default_llseek,
9327 };
9328
9329 static ssize_t
9330 buffer_percent_read(struct file *filp, char __user *ubuf,
9331                     size_t cnt, loff_t *ppos)
9332 {
9333         struct trace_array *tr = filp->private_data;
9334         char buf[64];
9335         int r;
9336
9337         r = tr->buffer_percent;
9338         r = sprintf(buf, "%d\n", r);
9339
9340         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9341 }
9342
9343 static ssize_t
9344 buffer_percent_write(struct file *filp, const char __user *ubuf,
9345                      size_t cnt, loff_t *ppos)
9346 {
9347         struct trace_array *tr = filp->private_data;
9348         unsigned long val;
9349         int ret;
9350
9351         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9352         if (ret)
9353                 return ret;
9354
9355         if (val > 100)
9356                 return -EINVAL;
9357
9358         tr->buffer_percent = val;
9359
9360         (*ppos)++;
9361
9362         return cnt;
9363 }
9364
9365 static const struct file_operations buffer_percent_fops = {
9366         .open           = tracing_open_generic_tr,
9367         .read           = buffer_percent_read,
9368         .write          = buffer_percent_write,
9369         .release        = tracing_release_generic_tr,
9370         .llseek         = default_llseek,
9371 };
9372
9373 static struct dentry *trace_instance_dir;
9374
9375 static void
9376 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9377
9378 static int
9379 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9380 {
9381         enum ring_buffer_flags rb_flags;
9382
9383         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9384
9385         buf->tr = tr;
9386
9387         buf->buffer = ring_buffer_alloc(size, rb_flags);
9388         if (!buf->buffer)
9389                 return -ENOMEM;
9390
9391         buf->data = alloc_percpu(struct trace_array_cpu);
9392         if (!buf->data) {
9393                 ring_buffer_free(buf->buffer);
9394                 buf->buffer = NULL;
9395                 return -ENOMEM;
9396         }
9397
9398         /* Allocate the first page for all buffers */
9399         set_buffer_entries(&tr->array_buffer,
9400                            ring_buffer_size(tr->array_buffer.buffer, 0));
9401
9402         return 0;
9403 }
9404
9405 static void free_trace_buffer(struct array_buffer *buf)
9406 {
9407         if (buf->buffer) {
9408                 ring_buffer_free(buf->buffer);
9409                 buf->buffer = NULL;
9410                 free_percpu(buf->data);
9411                 buf->data = NULL;
9412         }
9413 }
9414
9415 static int allocate_trace_buffers(struct trace_array *tr, int size)
9416 {
9417         int ret;
9418
9419         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9420         if (ret)
9421                 return ret;
9422
9423 #ifdef CONFIG_TRACER_MAX_TRACE
9424         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9425                                     allocate_snapshot ? size : 1);
9426         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9427                 free_trace_buffer(&tr->array_buffer);
9428                 return -ENOMEM;
9429         }
9430         tr->allocated_snapshot = allocate_snapshot;
9431
9432         allocate_snapshot = false;
9433 #endif
9434
9435         return 0;
9436 }
9437
9438 static void free_trace_buffers(struct trace_array *tr)
9439 {
9440         if (!tr)
9441                 return;
9442
9443         free_trace_buffer(&tr->array_buffer);
9444
9445 #ifdef CONFIG_TRACER_MAX_TRACE
9446         free_trace_buffer(&tr->max_buffer);
9447 #endif
9448 }
9449
9450 static void init_trace_flags_index(struct trace_array *tr)
9451 {
9452         int i;
9453
9454         /* Used by the trace options files */
9455         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9456                 tr->trace_flags_index[i] = i;
9457 }
9458
9459 static void __update_tracer_options(struct trace_array *tr)
9460 {
9461         struct tracer *t;
9462
9463         for (t = trace_types; t; t = t->next)
9464                 add_tracer_options(tr, t);
9465 }
9466
9467 static void update_tracer_options(struct trace_array *tr)
9468 {
9469         mutex_lock(&trace_types_lock);
9470         tracer_options_updated = true;
9471         __update_tracer_options(tr);
9472         mutex_unlock(&trace_types_lock);
9473 }
9474
9475 /* Must have trace_types_lock held */
9476 struct trace_array *trace_array_find(const char *instance)
9477 {
9478         struct trace_array *tr, *found = NULL;
9479
9480         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9481                 if (tr->name && strcmp(tr->name, instance) == 0) {
9482                         found = tr;
9483                         break;
9484                 }
9485         }
9486
9487         return found;
9488 }
9489
9490 struct trace_array *trace_array_find_get(const char *instance)
9491 {
9492         struct trace_array *tr;
9493
9494         mutex_lock(&trace_types_lock);
9495         tr = trace_array_find(instance);
9496         if (tr)
9497                 tr->ref++;
9498         mutex_unlock(&trace_types_lock);
9499
9500         return tr;
9501 }
9502
9503 static int trace_array_create_dir(struct trace_array *tr)
9504 {
9505         int ret;
9506
9507         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9508         if (!tr->dir)
9509                 return -EINVAL;
9510
9511         ret = event_trace_add_tracer(tr->dir, tr);
9512         if (ret) {
9513                 tracefs_remove(tr->dir);
9514                 return ret;
9515         }
9516
9517         init_tracer_tracefs(tr, tr->dir);
9518         __update_tracer_options(tr);
9519
9520         return ret;
9521 }
9522
9523 static struct trace_array *trace_array_create(const char *name)
9524 {
9525         struct trace_array *tr;
9526         int ret;
9527
9528         ret = -ENOMEM;
9529         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9530         if (!tr)
9531                 return ERR_PTR(ret);
9532
9533         tr->name = kstrdup(name, GFP_KERNEL);
9534         if (!tr->name)
9535                 goto out_free_tr;
9536
9537         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9538                 goto out_free_tr;
9539
9540         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9541                 goto out_free_tr;
9542
9543         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9544
9545         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9546
9547         raw_spin_lock_init(&tr->start_lock);
9548
9549         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9550
9551         tr->current_trace = &nop_trace;
9552
9553         INIT_LIST_HEAD(&tr->systems);
9554         INIT_LIST_HEAD(&tr->events);
9555         INIT_LIST_HEAD(&tr->hist_vars);
9556         INIT_LIST_HEAD(&tr->err_log);
9557
9558         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9559                 goto out_free_tr;
9560
9561         /* The ring buffer is defaultly expanded */
9562         trace_set_ring_buffer_expanded(tr);
9563
9564         if (ftrace_allocate_ftrace_ops(tr) < 0)
9565                 goto out_free_tr;
9566
9567         ftrace_init_trace_array(tr);
9568
9569         init_trace_flags_index(tr);
9570
9571         if (trace_instance_dir) {
9572                 ret = trace_array_create_dir(tr);
9573                 if (ret)
9574                         goto out_free_tr;
9575         } else
9576                 __trace_early_add_events(tr);
9577
9578         list_add(&tr->list, &ftrace_trace_arrays);
9579
9580         tr->ref++;
9581
9582         return tr;
9583
9584  out_free_tr:
9585         ftrace_free_ftrace_ops(tr);
9586         free_trace_buffers(tr);
9587         free_cpumask_var(tr->pipe_cpumask);
9588         free_cpumask_var(tr->tracing_cpumask);
9589         kfree(tr->name);
9590         kfree(tr);
9591
9592         return ERR_PTR(ret);
9593 }
9594
9595 static int instance_mkdir(const char *name)
9596 {
9597         struct trace_array *tr;
9598         int ret;
9599
9600         mutex_lock(&event_mutex);
9601         mutex_lock(&trace_types_lock);
9602
9603         ret = -EEXIST;
9604         if (trace_array_find(name))
9605                 goto out_unlock;
9606
9607         tr = trace_array_create(name);
9608
9609         ret = PTR_ERR_OR_ZERO(tr);
9610
9611 out_unlock:
9612         mutex_unlock(&trace_types_lock);
9613         mutex_unlock(&event_mutex);
9614         return ret;
9615 }
9616
9617 /**
9618  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9619  * @name: The name of the trace array to be looked up/created.
9620  *
9621  * Returns pointer to trace array with given name.
9622  * NULL, if it cannot be created.
9623  *
9624  * NOTE: This function increments the reference counter associated with the
9625  * trace array returned. This makes sure it cannot be freed while in use.
9626  * Use trace_array_put() once the trace array is no longer needed.
9627  * If the trace_array is to be freed, trace_array_destroy() needs to
9628  * be called after the trace_array_put(), or simply let user space delete
9629  * it from the tracefs instances directory. But until the
9630  * trace_array_put() is called, user space can not delete it.
9631  *
9632  */
9633 struct trace_array *trace_array_get_by_name(const char *name)
9634 {
9635         struct trace_array *tr;
9636
9637         mutex_lock(&event_mutex);
9638         mutex_lock(&trace_types_lock);
9639
9640         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9641                 if (tr->name && strcmp(tr->name, name) == 0)
9642                         goto out_unlock;
9643         }
9644
9645         tr = trace_array_create(name);
9646
9647         if (IS_ERR(tr))
9648                 tr = NULL;
9649 out_unlock:
9650         if (tr)
9651                 tr->ref++;
9652
9653         mutex_unlock(&trace_types_lock);
9654         mutex_unlock(&event_mutex);
9655         return tr;
9656 }
9657 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9658
9659 static int __remove_instance(struct trace_array *tr)
9660 {
9661         int i;
9662
9663         /* Reference counter for a newly created trace array = 1. */
9664         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9665                 return -EBUSY;
9666
9667         list_del(&tr->list);
9668
9669         /* Disable all the flags that were enabled coming in */
9670         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9671                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9672                         set_tracer_flag(tr, 1 << i, 0);
9673         }
9674
9675         tracing_set_nop(tr);
9676         clear_ftrace_function_probes(tr);
9677         event_trace_del_tracer(tr);
9678         ftrace_clear_pids(tr);
9679         ftrace_destroy_function_files(tr);
9680         tracefs_remove(tr->dir);
9681         free_percpu(tr->last_func_repeats);
9682         free_trace_buffers(tr);
9683         clear_tracing_err_log(tr);
9684
9685         for (i = 0; i < tr->nr_topts; i++) {
9686                 kfree(tr->topts[i].topts);
9687         }
9688         kfree(tr->topts);
9689
9690         free_cpumask_var(tr->pipe_cpumask);
9691         free_cpumask_var(tr->tracing_cpumask);
9692         kfree(tr->name);
9693         kfree(tr);
9694
9695         return 0;
9696 }
9697
9698 int trace_array_destroy(struct trace_array *this_tr)
9699 {
9700         struct trace_array *tr;
9701         int ret;
9702
9703         if (!this_tr)
9704                 return -EINVAL;
9705
9706         mutex_lock(&event_mutex);
9707         mutex_lock(&trace_types_lock);
9708
9709         ret = -ENODEV;
9710
9711         /* Making sure trace array exists before destroying it. */
9712         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9713                 if (tr == this_tr) {
9714                         ret = __remove_instance(tr);
9715                         break;
9716                 }
9717         }
9718
9719         mutex_unlock(&trace_types_lock);
9720         mutex_unlock(&event_mutex);
9721
9722         return ret;
9723 }
9724 EXPORT_SYMBOL_GPL(trace_array_destroy);
9725
9726 static int instance_rmdir(const char *name)
9727 {
9728         struct trace_array *tr;
9729         int ret;
9730
9731         mutex_lock(&event_mutex);
9732         mutex_lock(&trace_types_lock);
9733
9734         ret = -ENODEV;
9735         tr = trace_array_find(name);
9736         if (tr)
9737                 ret = __remove_instance(tr);
9738
9739         mutex_unlock(&trace_types_lock);
9740         mutex_unlock(&event_mutex);
9741
9742         return ret;
9743 }
9744
9745 static __init void create_trace_instances(struct dentry *d_tracer)
9746 {
9747         struct trace_array *tr;
9748
9749         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9750                                                          instance_mkdir,
9751                                                          instance_rmdir);
9752         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9753                 return;
9754
9755         mutex_lock(&event_mutex);
9756         mutex_lock(&trace_types_lock);
9757
9758         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9759                 if (!tr->name)
9760                         continue;
9761                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9762                              "Failed to create instance directory\n"))
9763                         break;
9764         }
9765
9766         mutex_unlock(&trace_types_lock);
9767         mutex_unlock(&event_mutex);
9768 }
9769
9770 static void
9771 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9772 {
9773         int cpu;
9774
9775         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9776                         tr, &show_traces_fops);
9777
9778         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9779                         tr, &set_tracer_fops);
9780
9781         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9782                           tr, &tracing_cpumask_fops);
9783
9784         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9785                           tr, &tracing_iter_fops);
9786
9787         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9788                           tr, &tracing_fops);
9789
9790         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9791                           tr, &tracing_pipe_fops);
9792
9793         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9794                           tr, &tracing_entries_fops);
9795
9796         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9797                           tr, &tracing_total_entries_fops);
9798
9799         trace_create_file("free_buffer", 0200, d_tracer,
9800                           tr, &tracing_free_buffer_fops);
9801
9802         trace_create_file("trace_marker", 0220, d_tracer,
9803                           tr, &tracing_mark_fops);
9804
9805         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9806
9807         trace_create_file("trace_marker_raw", 0220, d_tracer,
9808                           tr, &tracing_mark_raw_fops);
9809
9810         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9811                           &trace_clock_fops);
9812
9813         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9814                           tr, &rb_simple_fops);
9815
9816         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9817                           &trace_time_stamp_mode_fops);
9818
9819         tr->buffer_percent = 50;
9820
9821         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9822                         tr, &buffer_percent_fops);
9823
9824         create_trace_options_dir(tr);
9825
9826 #ifdef CONFIG_TRACER_MAX_TRACE
9827         trace_create_maxlat_file(tr, d_tracer);
9828 #endif
9829
9830         if (ftrace_create_function_files(tr, d_tracer))
9831                 MEM_FAIL(1, "Could not allocate function filter files");
9832
9833 #ifdef CONFIG_TRACER_SNAPSHOT
9834         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9835                           tr, &snapshot_fops);
9836 #endif
9837
9838         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9839                           tr, &tracing_err_log_fops);
9840
9841         for_each_tracing_cpu(cpu)
9842                 tracing_init_tracefs_percpu(tr, cpu);
9843
9844         ftrace_init_tracefs(tr, d_tracer);
9845 }
9846
9847 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9848 {
9849         struct vfsmount *mnt;
9850         struct file_system_type *type;
9851
9852         /*
9853          * To maintain backward compatibility for tools that mount
9854          * debugfs to get to the tracing facility, tracefs is automatically
9855          * mounted to the debugfs/tracing directory.
9856          */
9857         type = get_fs_type("tracefs");
9858         if (!type)
9859                 return NULL;
9860         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9861         put_filesystem(type);
9862         if (IS_ERR(mnt))
9863                 return NULL;
9864         mntget(mnt);
9865
9866         return mnt;
9867 }
9868
9869 /**
9870  * tracing_init_dentry - initialize top level trace array
9871  *
9872  * This is called when creating files or directories in the tracing
9873  * directory. It is called via fs_initcall() by any of the boot up code
9874  * and expects to return the dentry of the top level tracing directory.
9875  */
9876 int tracing_init_dentry(void)
9877 {
9878         struct trace_array *tr = &global_trace;
9879
9880         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9881                 pr_warn("Tracing disabled due to lockdown\n");
9882                 return -EPERM;
9883         }
9884
9885         /* The top level trace array uses  NULL as parent */
9886         if (tr->dir)
9887                 return 0;
9888
9889         if (WARN_ON(!tracefs_initialized()))
9890                 return -ENODEV;
9891
9892         /*
9893          * As there may still be users that expect the tracing
9894          * files to exist in debugfs/tracing, we must automount
9895          * the tracefs file system there, so older tools still
9896          * work with the newer kernel.
9897          */
9898         tr->dir = debugfs_create_automount("tracing", NULL,
9899                                            trace_automount, NULL);
9900
9901         return 0;
9902 }
9903
9904 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9905 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9906
9907 static struct workqueue_struct *eval_map_wq __initdata;
9908 static struct work_struct eval_map_work __initdata;
9909 static struct work_struct tracerfs_init_work __initdata;
9910
9911 static void __init eval_map_work_func(struct work_struct *work)
9912 {
9913         int len;
9914
9915         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9916         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9917 }
9918
9919 static int __init trace_eval_init(void)
9920 {
9921         INIT_WORK(&eval_map_work, eval_map_work_func);
9922
9923         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9924         if (!eval_map_wq) {
9925                 pr_err("Unable to allocate eval_map_wq\n");
9926                 /* Do work here */
9927                 eval_map_work_func(&eval_map_work);
9928                 return -ENOMEM;
9929         }
9930
9931         queue_work(eval_map_wq, &eval_map_work);
9932         return 0;
9933 }
9934
9935 subsys_initcall(trace_eval_init);
9936
9937 static int __init trace_eval_sync(void)
9938 {
9939         /* Make sure the eval map updates are finished */
9940         if (eval_map_wq)
9941                 destroy_workqueue(eval_map_wq);
9942         return 0;
9943 }
9944
9945 late_initcall_sync(trace_eval_sync);
9946
9947
9948 #ifdef CONFIG_MODULES
9949 static void trace_module_add_evals(struct module *mod)
9950 {
9951         if (!mod->num_trace_evals)
9952                 return;
9953
9954         /*
9955          * Modules with bad taint do not have events created, do
9956          * not bother with enums either.
9957          */
9958         if (trace_module_has_bad_taint(mod))
9959                 return;
9960
9961         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9962 }
9963
9964 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9965 static void trace_module_remove_evals(struct module *mod)
9966 {
9967         union trace_eval_map_item *map;
9968         union trace_eval_map_item **last = &trace_eval_maps;
9969
9970         if (!mod->num_trace_evals)
9971                 return;
9972
9973         mutex_lock(&trace_eval_mutex);
9974
9975         map = trace_eval_maps;
9976
9977         while (map) {
9978                 if (map->head.mod == mod)
9979                         break;
9980                 map = trace_eval_jmp_to_tail(map);
9981                 last = &map->tail.next;
9982                 map = map->tail.next;
9983         }
9984         if (!map)
9985                 goto out;
9986
9987         *last = trace_eval_jmp_to_tail(map)->tail.next;
9988         kfree(map);
9989  out:
9990         mutex_unlock(&trace_eval_mutex);
9991 }
9992 #else
9993 static inline void trace_module_remove_evals(struct module *mod) { }
9994 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9995
9996 static int trace_module_notify(struct notifier_block *self,
9997                                unsigned long val, void *data)
9998 {
9999         struct module *mod = data;
10000
10001         switch (val) {
10002         case MODULE_STATE_COMING:
10003                 trace_module_add_evals(mod);
10004                 break;
10005         case MODULE_STATE_GOING:
10006                 trace_module_remove_evals(mod);
10007                 break;
10008         }
10009
10010         return NOTIFY_OK;
10011 }
10012
10013 static struct notifier_block trace_module_nb = {
10014         .notifier_call = trace_module_notify,
10015         .priority = 0,
10016 };
10017 #endif /* CONFIG_MODULES */
10018
10019 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10020 {
10021
10022         event_trace_init();
10023
10024         init_tracer_tracefs(&global_trace, NULL);
10025         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10026
10027         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10028                         &global_trace, &tracing_thresh_fops);
10029
10030         trace_create_file("README", TRACE_MODE_READ, NULL,
10031                         NULL, &tracing_readme_fops);
10032
10033         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10034                         NULL, &tracing_saved_cmdlines_fops);
10035
10036         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10037                           NULL, &tracing_saved_cmdlines_size_fops);
10038
10039         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10040                         NULL, &tracing_saved_tgids_fops);
10041
10042         trace_create_eval_file(NULL);
10043
10044 #ifdef CONFIG_MODULES
10045         register_module_notifier(&trace_module_nb);
10046 #endif
10047
10048 #ifdef CONFIG_DYNAMIC_FTRACE
10049         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10050                         NULL, &tracing_dyn_info_fops);
10051 #endif
10052
10053         create_trace_instances(NULL);
10054
10055         update_tracer_options(&global_trace);
10056 }
10057
10058 static __init int tracer_init_tracefs(void)
10059 {
10060         int ret;
10061
10062         trace_access_lock_init();
10063
10064         ret = tracing_init_dentry();
10065         if (ret)
10066                 return 0;
10067
10068         if (eval_map_wq) {
10069                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10070                 queue_work(eval_map_wq, &tracerfs_init_work);
10071         } else {
10072                 tracer_init_tracefs_work_func(NULL);
10073         }
10074
10075         rv_init_interface();
10076
10077         return 0;
10078 }
10079
10080 fs_initcall(tracer_init_tracefs);
10081
10082 static int trace_die_panic_handler(struct notifier_block *self,
10083                                 unsigned long ev, void *unused);
10084
10085 static struct notifier_block trace_panic_notifier = {
10086         .notifier_call = trace_die_panic_handler,
10087         .priority = INT_MAX - 1,
10088 };
10089
10090 static struct notifier_block trace_die_notifier = {
10091         .notifier_call = trace_die_panic_handler,
10092         .priority = INT_MAX - 1,
10093 };
10094
10095 /*
10096  * The idea is to execute the following die/panic callback early, in order
10097  * to avoid showing irrelevant information in the trace (like other panic
10098  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10099  * warnings get disabled (to prevent potential log flooding).
10100  */
10101 static int trace_die_panic_handler(struct notifier_block *self,
10102                                 unsigned long ev, void *unused)
10103 {
10104         if (!ftrace_dump_on_oops)
10105                 return NOTIFY_DONE;
10106
10107         /* The die notifier requires DIE_OOPS to trigger */
10108         if (self == &trace_die_notifier && ev != DIE_OOPS)
10109                 return NOTIFY_DONE;
10110
10111         ftrace_dump(ftrace_dump_on_oops);
10112
10113         return NOTIFY_DONE;
10114 }
10115
10116 /*
10117  * printk is set to max of 1024, we really don't need it that big.
10118  * Nothing should be printing 1000 characters anyway.
10119  */
10120 #define TRACE_MAX_PRINT         1000
10121
10122 /*
10123  * Define here KERN_TRACE so that we have one place to modify
10124  * it if we decide to change what log level the ftrace dump
10125  * should be at.
10126  */
10127 #define KERN_TRACE              KERN_EMERG
10128
10129 void
10130 trace_printk_seq(struct trace_seq *s)
10131 {
10132         /* Probably should print a warning here. */
10133         if (s->seq.len >= TRACE_MAX_PRINT)
10134                 s->seq.len = TRACE_MAX_PRINT;
10135
10136         /*
10137          * More paranoid code. Although the buffer size is set to
10138          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10139          * an extra layer of protection.
10140          */
10141         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10142                 s->seq.len = s->seq.size - 1;
10143
10144         /* should be zero ended, but we are paranoid. */
10145         s->buffer[s->seq.len] = 0;
10146
10147         printk(KERN_TRACE "%s", s->buffer);
10148
10149         trace_seq_init(s);
10150 }
10151
10152 void trace_init_global_iter(struct trace_iterator *iter)
10153 {
10154         iter->tr = &global_trace;
10155         iter->trace = iter->tr->current_trace;
10156         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10157         iter->array_buffer = &global_trace.array_buffer;
10158
10159         if (iter->trace && iter->trace->open)
10160                 iter->trace->open(iter);
10161
10162         /* Annotate start of buffers if we had overruns */
10163         if (ring_buffer_overruns(iter->array_buffer->buffer))
10164                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10165
10166         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10167         if (trace_clocks[iter->tr->clock_id].in_ns)
10168                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10169
10170         /* Can not use kmalloc for iter.temp and iter.fmt */
10171         iter->temp = static_temp_buf;
10172         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10173         iter->fmt = static_fmt_buf;
10174         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10175 }
10176
10177 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10178 {
10179         /* use static because iter can be a bit big for the stack */
10180         static struct trace_iterator iter;
10181         static atomic_t dump_running;
10182         struct trace_array *tr = &global_trace;
10183         unsigned int old_userobj;
10184         unsigned long flags;
10185         int cnt = 0, cpu;
10186
10187         /* Only allow one dump user at a time. */
10188         if (atomic_inc_return(&dump_running) != 1) {
10189                 atomic_dec(&dump_running);
10190                 return;
10191         }
10192
10193         /*
10194          * Always turn off tracing when we dump.
10195          * We don't need to show trace output of what happens
10196          * between multiple crashes.
10197          *
10198          * If the user does a sysrq-z, then they can re-enable
10199          * tracing with echo 1 > tracing_on.
10200          */
10201         tracing_off();
10202
10203         local_irq_save(flags);
10204
10205         /* Simulate the iterator */
10206         trace_init_global_iter(&iter);
10207
10208         for_each_tracing_cpu(cpu) {
10209                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10210         }
10211
10212         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10213
10214         /* don't look at user memory in panic mode */
10215         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10216
10217         switch (oops_dump_mode) {
10218         case DUMP_ALL:
10219                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10220                 break;
10221         case DUMP_ORIG:
10222                 iter.cpu_file = raw_smp_processor_id();
10223                 break;
10224         case DUMP_NONE:
10225                 goto out_enable;
10226         default:
10227                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10228                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10229         }
10230
10231         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10232
10233         /* Did function tracer already get disabled? */
10234         if (ftrace_is_dead()) {
10235                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10236                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10237         }
10238
10239         /*
10240          * We need to stop all tracing on all CPUS to read
10241          * the next buffer. This is a bit expensive, but is
10242          * not done often. We fill all what we can read,
10243          * and then release the locks again.
10244          */
10245
10246         while (!trace_empty(&iter)) {
10247
10248                 if (!cnt)
10249                         printk(KERN_TRACE "---------------------------------\n");
10250
10251                 cnt++;
10252
10253                 trace_iterator_reset(&iter);
10254                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10255
10256                 if (trace_find_next_entry_inc(&iter) != NULL) {
10257                         int ret;
10258
10259                         ret = print_trace_line(&iter);
10260                         if (ret != TRACE_TYPE_NO_CONSUME)
10261                                 trace_consume(&iter);
10262                 }
10263                 touch_nmi_watchdog();
10264
10265                 trace_printk_seq(&iter.seq);
10266         }
10267
10268         if (!cnt)
10269                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10270         else
10271                 printk(KERN_TRACE "---------------------------------\n");
10272
10273  out_enable:
10274         tr->trace_flags |= old_userobj;
10275
10276         for_each_tracing_cpu(cpu) {
10277                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10278         }
10279         atomic_dec(&dump_running);
10280         local_irq_restore(flags);
10281 }
10282 EXPORT_SYMBOL_GPL(ftrace_dump);
10283
10284 #define WRITE_BUFSIZE  4096
10285
10286 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10287                                 size_t count, loff_t *ppos,
10288                                 int (*createfn)(const char *))
10289 {
10290         char *kbuf, *buf, *tmp;
10291         int ret = 0;
10292         size_t done = 0;
10293         size_t size;
10294
10295         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10296         if (!kbuf)
10297                 return -ENOMEM;
10298
10299         while (done < count) {
10300                 size = count - done;
10301
10302                 if (size >= WRITE_BUFSIZE)
10303                         size = WRITE_BUFSIZE - 1;
10304
10305                 if (copy_from_user(kbuf, buffer + done, size)) {
10306                         ret = -EFAULT;
10307                         goto out;
10308                 }
10309                 kbuf[size] = '\0';
10310                 buf = kbuf;
10311                 do {
10312                         tmp = strchr(buf, '\n');
10313                         if (tmp) {
10314                                 *tmp = '\0';
10315                                 size = tmp - buf + 1;
10316                         } else {
10317                                 size = strlen(buf);
10318                                 if (done + size < count) {
10319                                         if (buf != kbuf)
10320                                                 break;
10321                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10322                                         pr_warn("Line length is too long: Should be less than %d\n",
10323                                                 WRITE_BUFSIZE - 2);
10324                                         ret = -EINVAL;
10325                                         goto out;
10326                                 }
10327                         }
10328                         done += size;
10329
10330                         /* Remove comments */
10331                         tmp = strchr(buf, '#');
10332
10333                         if (tmp)
10334                                 *tmp = '\0';
10335
10336                         ret = createfn(buf);
10337                         if (ret)
10338                                 goto out;
10339                         buf += size;
10340
10341                 } while (done < count);
10342         }
10343         ret = done;
10344
10345 out:
10346         kfree(kbuf);
10347
10348         return ret;
10349 }
10350
10351 #ifdef CONFIG_TRACER_MAX_TRACE
10352 __init static bool tr_needs_alloc_snapshot(const char *name)
10353 {
10354         char *test;
10355         int len = strlen(name);
10356         bool ret;
10357
10358         if (!boot_snapshot_index)
10359                 return false;
10360
10361         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10362             boot_snapshot_info[len] == '\t')
10363                 return true;
10364
10365         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10366         if (!test)
10367                 return false;
10368
10369         sprintf(test, "\t%s\t", name);
10370         ret = strstr(boot_snapshot_info, test) == NULL;
10371         kfree(test);
10372         return ret;
10373 }
10374
10375 __init static void do_allocate_snapshot(const char *name)
10376 {
10377         if (!tr_needs_alloc_snapshot(name))
10378                 return;
10379
10380         /*
10381          * When allocate_snapshot is set, the next call to
10382          * allocate_trace_buffers() (called by trace_array_get_by_name())
10383          * will allocate the snapshot buffer. That will alse clear
10384          * this flag.
10385          */
10386         allocate_snapshot = true;
10387 }
10388 #else
10389 static inline void do_allocate_snapshot(const char *name) { }
10390 #endif
10391
10392 __init static void enable_instances(void)
10393 {
10394         struct trace_array *tr;
10395         char *curr_str;
10396         char *str;
10397         char *tok;
10398
10399         /* A tab is always appended */
10400         boot_instance_info[boot_instance_index - 1] = '\0';
10401         str = boot_instance_info;
10402
10403         while ((curr_str = strsep(&str, "\t"))) {
10404
10405                 tok = strsep(&curr_str, ",");
10406
10407                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10408                         do_allocate_snapshot(tok);
10409
10410                 tr = trace_array_get_by_name(tok);
10411                 if (!tr) {
10412                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10413                         continue;
10414                 }
10415                 /* Allow user space to delete it */
10416                 trace_array_put(tr);
10417
10418                 while ((tok = strsep(&curr_str, ","))) {
10419                         early_enable_events(tr, tok, true);
10420                 }
10421         }
10422 }
10423
10424 __init static int tracer_alloc_buffers(void)
10425 {
10426         int ring_buf_size;
10427         int ret = -ENOMEM;
10428
10429
10430         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10431                 pr_warn("Tracing disabled due to lockdown\n");
10432                 return -EPERM;
10433         }
10434
10435         /*
10436          * Make sure we don't accidentally add more trace options
10437          * than we have bits for.
10438          */
10439         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10440
10441         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10442                 goto out;
10443
10444         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10445                 goto out_free_buffer_mask;
10446
10447         /* Only allocate trace_printk buffers if a trace_printk exists */
10448         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10449                 /* Must be called before global_trace.buffer is allocated */
10450                 trace_printk_init_buffers();
10451
10452         /* To save memory, keep the ring buffer size to its minimum */
10453         if (global_trace.ring_buffer_expanded)
10454                 ring_buf_size = trace_buf_size;
10455         else
10456                 ring_buf_size = 1;
10457
10458         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10459         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10460
10461         raw_spin_lock_init(&global_trace.start_lock);
10462
10463         /*
10464          * The prepare callbacks allocates some memory for the ring buffer. We
10465          * don't free the buffer if the CPU goes down. If we were to free
10466          * the buffer, then the user would lose any trace that was in the
10467          * buffer. The memory will be removed once the "instance" is removed.
10468          */
10469         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10470                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10471                                       NULL);
10472         if (ret < 0)
10473                 goto out_free_cpumask;
10474         /* Used for event triggers */
10475         ret = -ENOMEM;
10476         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10477         if (!temp_buffer)
10478                 goto out_rm_hp_state;
10479
10480         if (trace_create_savedcmd() < 0)
10481                 goto out_free_temp_buffer;
10482
10483         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10484                 goto out_free_savedcmd;
10485
10486         /* TODO: make the number of buffers hot pluggable with CPUS */
10487         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10488                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10489                 goto out_free_pipe_cpumask;
10490         }
10491         if (global_trace.buffer_disabled)
10492                 tracing_off();
10493
10494         if (trace_boot_clock) {
10495                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10496                 if (ret < 0)
10497                         pr_warn("Trace clock %s not defined, going back to default\n",
10498                                 trace_boot_clock);
10499         }
10500
10501         /*
10502          * register_tracer() might reference current_trace, so it
10503          * needs to be set before we register anything. This is
10504          * just a bootstrap of current_trace anyway.
10505          */
10506         global_trace.current_trace = &nop_trace;
10507
10508         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10509
10510         ftrace_init_global_array_ops(&global_trace);
10511
10512         init_trace_flags_index(&global_trace);
10513
10514         register_tracer(&nop_trace);
10515
10516         /* Function tracing may start here (via kernel command line) */
10517         init_function_trace();
10518
10519         /* All seems OK, enable tracing */
10520         tracing_disabled = 0;
10521
10522         atomic_notifier_chain_register(&panic_notifier_list,
10523                                        &trace_panic_notifier);
10524
10525         register_die_notifier(&trace_die_notifier);
10526
10527         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10528
10529         INIT_LIST_HEAD(&global_trace.systems);
10530         INIT_LIST_HEAD(&global_trace.events);
10531         INIT_LIST_HEAD(&global_trace.hist_vars);
10532         INIT_LIST_HEAD(&global_trace.err_log);
10533         list_add(&global_trace.list, &ftrace_trace_arrays);
10534
10535         apply_trace_boot_options();
10536
10537         register_snapshot_cmd();
10538
10539         test_can_verify();
10540
10541         return 0;
10542
10543 out_free_pipe_cpumask:
10544         free_cpumask_var(global_trace.pipe_cpumask);
10545 out_free_savedcmd:
10546         free_saved_cmdlines_buffer(savedcmd);
10547 out_free_temp_buffer:
10548         ring_buffer_free(temp_buffer);
10549 out_rm_hp_state:
10550         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10551 out_free_cpumask:
10552         free_cpumask_var(global_trace.tracing_cpumask);
10553 out_free_buffer_mask:
10554         free_cpumask_var(tracing_buffer_mask);
10555 out:
10556         return ret;
10557 }
10558
10559 void __init ftrace_boot_snapshot(void)
10560 {
10561 #ifdef CONFIG_TRACER_MAX_TRACE
10562         struct trace_array *tr;
10563
10564         if (!snapshot_at_boot)
10565                 return;
10566
10567         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10568                 if (!tr->allocated_snapshot)
10569                         continue;
10570
10571                 tracing_snapshot_instance(tr);
10572                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10573         }
10574 #endif
10575 }
10576
10577 void __init early_trace_init(void)
10578 {
10579         if (tracepoint_printk) {
10580                 tracepoint_print_iter =
10581                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10582                 if (MEM_FAIL(!tracepoint_print_iter,
10583                              "Failed to allocate trace iterator\n"))
10584                         tracepoint_printk = 0;
10585                 else
10586                         static_key_enable(&tracepoint_printk_key.key);
10587         }
10588         tracer_alloc_buffers();
10589
10590         init_events();
10591 }
10592
10593 void __init trace_init(void)
10594 {
10595         trace_event_init();
10596
10597         if (boot_instance_index)
10598                 enable_instances();
10599 }
10600
10601 __init static void clear_boot_tracer(void)
10602 {
10603         /*
10604          * The default tracer at boot buffer is an init section.
10605          * This function is called in lateinit. If we did not
10606          * find the boot tracer, then clear it out, to prevent
10607          * later registration from accessing the buffer that is
10608          * about to be freed.
10609          */
10610         if (!default_bootup_tracer)
10611                 return;
10612
10613         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10614                default_bootup_tracer);
10615         default_bootup_tracer = NULL;
10616 }
10617
10618 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10619 __init static void tracing_set_default_clock(void)
10620 {
10621         /* sched_clock_stable() is determined in late_initcall */
10622         if (!trace_boot_clock && !sched_clock_stable()) {
10623                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10624                         pr_warn("Can not set tracing clock due to lockdown\n");
10625                         return;
10626                 }
10627
10628                 printk(KERN_WARNING
10629                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10630                        "If you want to keep using the local clock, then add:\n"
10631                        "  \"trace_clock=local\"\n"
10632                        "on the kernel command line\n");
10633                 tracing_set_clock(&global_trace, "global");
10634         }
10635 }
10636 #else
10637 static inline void tracing_set_default_clock(void) { }
10638 #endif
10639
10640 __init static int late_trace_init(void)
10641 {
10642         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10643                 static_key_disable(&tracepoint_printk_key.key);
10644                 tracepoint_printk = 0;
10645         }
10646
10647         tracing_set_default_clock();
10648         clear_boot_tracer();
10649         return 0;
10650 }
10651
10652 late_initcall_sync(late_trace_init);