ring-buffer: Select IRQ_WORK
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / trace / trace_events.c
index 57e9b28..53582e9 100644 (file)
@@ -34,9 +34,27 @@ char event_storage[EVENT_STORAGE_SIZE];
 EXPORT_SYMBOL_GPL(event_storage);
 
 LIST_HEAD(ftrace_events);
-LIST_HEAD(ftrace_common_fields);
+static LIST_HEAD(ftrace_common_fields);
 
-struct list_head *
+#define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
+
+static struct kmem_cache *field_cachep;
+static struct kmem_cache *file_cachep;
+
+/* Double loops, do not use break, only goto's work */
+#define do_for_each_event_file(tr, file)                       \
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
+               list_for_each_entry(file, &tr->events, list)
+
+#define do_for_each_event_file_safe(tr, file)                  \
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
+               struct ftrace_event_file *___n;                         \
+               list_for_each_entry_safe(file, ___n, &tr->events, list)
+
+#define while_for_each_event_file()            \
+       }
+
+static struct list_head *
 trace_get_fields(struct ftrace_event_call *event_call)
 {
        if (!event_call->class->get_fields)
@@ -44,23 +62,45 @@ trace_get_fields(struct ftrace_event_call *event_call)
        return event_call->class->get_fields(event_call);
 }
 
+static struct ftrace_event_field *
+__find_event_field(struct list_head *head, char *name)
+{
+       struct ftrace_event_field *field;
+
+       list_for_each_entry(field, head, link) {
+               if (!strcmp(field->name, name))
+                       return field;
+       }
+
+       return NULL;
+}
+
+struct ftrace_event_field *
+trace_find_event_field(struct ftrace_event_call *call, char *name)
+{
+       struct ftrace_event_field *field;
+       struct list_head *head;
+
+       field = __find_event_field(&ftrace_common_fields, name);
+       if (field)
+               return field;
+
+       head = trace_get_fields(call);
+       return __find_event_field(head, name);
+}
+
 static int __trace_define_field(struct list_head *head, const char *type,
                                const char *name, int offset, int size,
                                int is_signed, int filter_type)
 {
        struct ftrace_event_field *field;
 
-       field = kzalloc(sizeof(*field), GFP_KERNEL);
+       field = kmem_cache_alloc(field_cachep, GFP_TRACE);
        if (!field)
                goto err;
 
-       field->name = kstrdup(name, GFP_KERNEL);
-       if (!field->name)
-               goto err;
-
-       field->type = kstrdup(type, GFP_KERNEL);
-       if (!field->type)
-               goto err;
+       field->name = name;
+       field->type = type;
 
        if (filter_type == FILTER_OTHER)
                field->filter_type = filter_assign_type(type);
@@ -76,9 +116,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
        return 0;
 
 err:
-       if (field)
-               kfree(field->name);
-       kfree(field);
+       kmem_cache_free(field_cachep, field);
 
        return -ENOMEM;
 }
@@ -120,7 +158,7 @@ static int trace_define_common_fields(void)
        return ret;
 }
 
-void trace_destroy_fields(struct ftrace_event_call *call)
+static void trace_destroy_fields(struct ftrace_event_call *call)
 {
        struct ftrace_event_field *field, *next;
        struct list_head *head;
@@ -128,9 +166,7 @@ void trace_destroy_fields(struct ftrace_event_call *call)
        head = trace_get_fields(call);
        list_for_each_entry_safe(field, next, head, link) {
                list_del(&field->link);
-               kfree(field->type);
-               kfree(field->name);
-               kfree(field);
+               kmem_cache_free(field_cachep, field);
        }
 }
 
@@ -149,15 +185,17 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init);
 int ftrace_event_reg(struct ftrace_event_call *call,
                     enum trace_reg type, void *data)
 {
+       struct ftrace_event_file *file = data;
+
        switch (type) {
        case TRACE_REG_REGISTER:
                return tracepoint_probe_register(call->name,
                                                 call->class->probe,
-                                                call);
+                                                file);
        case TRACE_REG_UNREGISTER:
                tracepoint_probe_unregister(call->name,
                                            call->class->probe,
-                                           call);
+                                           file);
                return 0;
 
 #ifdef CONFIG_PERF_EVENTS
@@ -183,54 +221,100 @@ EXPORT_SYMBOL_GPL(ftrace_event_reg);
 
 void trace_event_enable_cmd_record(bool enable)
 {
-       struct ftrace_event_call *call;
+       struct ftrace_event_file *file;
+       struct trace_array *tr;
 
        mutex_lock(&event_mutex);
-       list_for_each_entry(call, &ftrace_events, list) {
-               if (!(call->flags & TRACE_EVENT_FL_ENABLED))
+       do_for_each_event_file(tr, file) {
+
+               if (!(file->flags & FTRACE_EVENT_FL_ENABLED))
                        continue;
 
                if (enable) {
                        tracing_start_cmdline_record();
-                       call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+                       set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
                } else {
                        tracing_stop_cmdline_record();
-                       call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+                       clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
                }
-       }
+       } while_for_each_event_file();
        mutex_unlock(&event_mutex);
 }
 
-static int ftrace_event_enable_disable(struct ftrace_event_call *call,
-                                       int enable)
+static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
+                                        int enable, int soft_disable)
 {
+       struct ftrace_event_call *call = file->event_call;
        int ret = 0;
+       int disable;
 
        switch (enable) {
        case 0:
-               if (call->flags & TRACE_EVENT_FL_ENABLED) {
-                       call->flags &= ~TRACE_EVENT_FL_ENABLED;
-                       if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
+               /*
+                * When soft_disable is set and enable is cleared, we want
+                * to clear the SOFT_DISABLED flag but leave the event in the
+                * state that it was. That is, if the event was enabled and
+                * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
+                * is set we do not want the event to be enabled before we
+                * clear the bit.
+                *
+                * When soft_disable is not set but the SOFT_MODE flag is,
+                * we do nothing. Do not disable the tracepoint, otherwise
+                * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
+                */
+               if (soft_disable) {
+                       disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED;
+                       clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
+               } else
+                       disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE);
+
+               if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) {
+                       clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
+                       if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) {
                                tracing_stop_cmdline_record();
-                               call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+                               clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
                        }
-                       call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
+                       call->class->reg(call, TRACE_REG_UNREGISTER, file);
                }
+               /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */
+               if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
+                       set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
                break;
        case 1:
-               if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
+               /*
+                * When soft_disable is set and enable is set, we want to
+                * register the tracepoint for the event, but leave the event
+                * as is. That means, if the event was already enabled, we do
+                * nothing (but set SOFT_MODE). If the event is disabled, we
+                * set SOFT_DISABLED before enabling the event tracepoint, so
+                * it still seems to be disabled.
+                */
+               if (!soft_disable)
+                       clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+               else
+                       set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
+
+               if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) {
+
+                       /* Keep the event disabled, when going to SOFT_MODE. */
+                       if (soft_disable)
+                               set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+
                        if (trace_flags & TRACE_ITER_RECORD_CMD) {
                                tracing_start_cmdline_record();
-                               call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+                               set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
                        }
-                       ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
+                       ret = call->class->reg(call, TRACE_REG_REGISTER, file);
                        if (ret) {
                                tracing_stop_cmdline_record();
                                pr_info("event trace: Could not enable event "
                                        "%s\n", call->name);
                                break;
                        }
-                       call->flags |= TRACE_EVENT_FL_ENABLED;
+                       set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
+
+                       /* WAS_ENABLED gets set but never cleared. */
+                       call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
                }
                break;
        }
@@ -238,13 +322,19 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
        return ret;
 }
 
-static void ftrace_clear_events(void)
+static int ftrace_event_enable_disable(struct ftrace_event_file *file,
+                                      int enable)
 {
-       struct ftrace_event_call *call;
+       return __ftrace_event_enable_disable(file, enable, 0);
+}
+
+static void ftrace_clear_events(struct trace_array *tr)
+{
+       struct ftrace_event_file *file;
 
        mutex_lock(&event_mutex);
-       list_for_each_entry(call, &ftrace_events, list) {
-               ftrace_event_enable_disable(call, 0);
+       list_for_each_entry(file, &tr->events, list) {
+               ftrace_event_enable_disable(file, 0);
        }
        mutex_unlock(&event_mutex);
 }
@@ -257,11 +347,12 @@ static void __put_system(struct event_subsystem *system)
        if (--system->ref_count)
                return;
 
+       list_del(&system->list);
+
        if (filter) {
                kfree(filter->filter_string);
                kfree(filter);
        }
-       kfree(system->name);
        kfree(system);
 }
 
@@ -271,24 +362,45 @@ static void __get_system(struct event_subsystem *system)
        system->ref_count++;
 }
 
-static void put_system(struct event_subsystem *system)
+static void __get_system_dir(struct ftrace_subsystem_dir *dir)
+{
+       WARN_ON_ONCE(dir->ref_count == 0);
+       dir->ref_count++;
+       __get_system(dir->subsystem);
+}
+
+static void __put_system_dir(struct ftrace_subsystem_dir *dir)
+{
+       WARN_ON_ONCE(dir->ref_count == 0);
+       /* If the subsystem is about to be freed, the dir must be too */
+       WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1);
+
+       __put_system(dir->subsystem);
+       if (!--dir->ref_count)
+               kfree(dir);
+}
+
+static void put_system(struct ftrace_subsystem_dir *dir)
 {
        mutex_lock(&event_mutex);
-       __put_system(system);
+       __put_system_dir(dir);
        mutex_unlock(&event_mutex);
 }
 
 /*
  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
  */
-static int __ftrace_set_clr_event(const char *match, const char *sub,
-                                 const char *event, int set)
+static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
+                                 const char *sub, const char *event, int set)
 {
+       struct ftrace_event_file *file;
        struct ftrace_event_call *call;
        int ret = -EINVAL;
 
        mutex_lock(&event_mutex);
-       list_for_each_entry(call, &ftrace_events, list) {
+       list_for_each_entry(file, &tr->events, list) {
+
+               call = file->event_call;
 
                if (!call->name || !call->class || !call->class->reg)
                        continue;
@@ -307,7 +419,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
                if (event && strcmp(event, call->name) != 0)
                        continue;
 
-               ftrace_event_enable_disable(call, set);
+               ftrace_event_enable_disable(file, set);
 
                ret = 0;
        }
@@ -316,7 +428,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
        return ret;
 }
 
-static int ftrace_set_clr_event(char *buf, int set)
+static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 {
        char *event = NULL, *sub = NULL, *match;
 
@@ -344,7 +456,7 @@ static int ftrace_set_clr_event(char *buf, int set)
                        event = NULL;
        }
 
-       return __ftrace_set_clr_event(match, sub, event, set);
+       return __ftrace_set_clr_event(tr, match, sub, event, set);
 }
 
 /**
@@ -361,7 +473,9 @@ static int ftrace_set_clr_event(char *buf, int set)
  */
 int trace_set_clr_event(const char *system, const char *event, int set)
 {
-       return __ftrace_set_clr_event(NULL, system, event, set);
+       struct trace_array *tr = top_trace_array();
+
+       return __ftrace_set_clr_event(tr, NULL, system, event, set);
 }
 EXPORT_SYMBOL_GPL(trace_set_clr_event);
 
@@ -373,6 +487,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
        struct trace_parser parser;
+       struct seq_file *m = file->private_data;
+       struct trace_array *tr = m->private;
        ssize_t read, ret;
 
        if (!cnt)
@@ -395,7 +511,7 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 
                parser.buffer[parser.idx] = 0;
 
-               ret = ftrace_set_clr_event(parser.buffer + !set, set);
+               ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
                if (ret)
                        goto out_put;
        }
@@ -411,17 +527,20 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct ftrace_event_call *call = v;
+       struct ftrace_event_file *file = v;
+       struct ftrace_event_call *call;
+       struct trace_array *tr = m->private;
 
        (*pos)++;
 
-       list_for_each_entry_continue(call, &ftrace_events, list) {
+       list_for_each_entry_continue(file, &tr->events, list) {
+               call = file->event_call;
                /*
                 * The ftrace subsystem is for showing formats only.
                 * They can not be enabled or disabled via the event files.
                 */
                if (call->class && call->class->reg)
-                       return call;
+                       return file;
        }
 
        return NULL;
@@ -429,30 +548,32 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
-       struct ftrace_event_call *call;
+       struct ftrace_event_file *file;
+       struct trace_array *tr = m->private;
        loff_t l;
 
        mutex_lock(&event_mutex);
 
-       call = list_entry(&ftrace_events, struct ftrace_event_call, list);
+       file = list_entry(&tr->events, struct ftrace_event_file, list);
        for (l = 0; l <= *pos; ) {
-               call = t_next(m, call, &l);
-               if (!call)
+               file = t_next(m, file, &l);
+               if (!file)
                        break;
        }
-       return call;
+       return file;
 }
 
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct ftrace_event_call *call = v;
+       struct ftrace_event_file *file = v;
+       struct trace_array *tr = m->private;
 
        (*pos)++;
 
-       list_for_each_entry_continue(call, &ftrace_events, list) {
-               if (call->flags & TRACE_EVENT_FL_ENABLED)
-                       return call;
+       list_for_each_entry_continue(file, &tr->events, list) {
+               if (file->flags & FTRACE_EVENT_FL_ENABLED)
+                       return file;
        }
 
        return NULL;
@@ -460,23 +581,25 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
-       struct ftrace_event_call *call;
+       struct ftrace_event_file *file;
+       struct trace_array *tr = m->private;
        loff_t l;
 
        mutex_lock(&event_mutex);
 
-       call = list_entry(&ftrace_events, struct ftrace_event_call, list);
+       file = list_entry(&tr->events, struct ftrace_event_file, list);
        for (l = 0; l <= *pos; ) {
-               call = s_next(m, call, &l);
-               if (!call)
+               file = s_next(m, file, &l);
+               if (!file)
                        break;
        }
-       return call;
+       return file;
 }
 
 static int t_show(struct seq_file *m, void *v)
 {
-       struct ftrace_event_call *call = v;
+       struct ftrace_event_file *file = v;
+       struct ftrace_event_call *call = file->event_call;
 
        if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
                seq_printf(m, "%s:", call->class->system);
@@ -494,25 +617,31 @@ static ssize_t
 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                  loff_t *ppos)
 {
-       struct ftrace_event_call *call = filp->private_data;
+       struct ftrace_event_file *file = filp->private_data;
        char *buf;
 
-       if (call->flags & TRACE_EVENT_FL_ENABLED)
-               buf = "1\n";
-       else
+       if (file->flags & FTRACE_EVENT_FL_ENABLED) {
+               if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
+                       buf = "0*\n";
+               else
+                       buf = "1\n";
+       } else
                buf = "0\n";
 
-       return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
 }
 
 static ssize_t
 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
-       struct ftrace_event_call *call = filp->private_data;
+       struct ftrace_event_file *file = filp->private_data;
        unsigned long val;
        int ret;
 
+       if (!file)
+               return -EINVAL;
+
        ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
        if (ret)
                return ret;
@@ -525,7 +654,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        case 0:
        case 1:
                mutex_lock(&event_mutex);
-               ret = ftrace_event_enable_disable(call, val);
+               ret = ftrace_event_enable_disable(file, val);
                mutex_unlock(&event_mutex);
                break;
 
@@ -543,14 +672,18 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
        const char set_to_char[4] = { '?', '0', '1', 'X' };
-       struct event_subsystem *system = filp->private_data;
+       struct ftrace_subsystem_dir *dir = filp->private_data;
+       struct event_subsystem *system = dir->subsystem;
        struct ftrace_event_call *call;
+       struct ftrace_event_file *file;
+       struct trace_array *tr = dir->tr;
        char buf[2];
        int set = 0;
        int ret;
 
        mutex_lock(&event_mutex);
-       list_for_each_entry(call, &ftrace_events, list) {
+       list_for_each_entry(file, &tr->events, list) {
+               call = file->event_call;
                if (!call->name || !call->class || !call->class->reg)
                        continue;
 
@@ -562,7 +695,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                 * or if all events or cleared, or if we have
                 * a mixture.
                 */
-               set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
+               set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED));
 
                /*
                 * If we have a mixture, no need to look further.
@@ -584,7 +717,8 @@ static ssize_t
 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
                    loff_t *ppos)
 {
-       struct event_subsystem *system = filp->private_data;
+       struct ftrace_subsystem_dir *dir = filp->private_data;
+       struct event_subsystem *system = dir->subsystem;
        const char *name = NULL;
        unsigned long val;
        ssize_t ret;
@@ -607,7 +741,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (system)
                name = system->name;
 
-       ret = __ftrace_set_clr_event(NULL, name, NULL, val);
+       ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
        if (ret)
                goto out;
 
@@ -845,43 +979,75 @@ static LIST_HEAD(event_subsystems);
 static int subsystem_open(struct inode *inode, struct file *filp)
 {
        struct event_subsystem *system = NULL;
+       struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */
+       struct trace_array *tr;
        int ret;
 
-       if (!inode->i_private)
-               goto skip_search;
-
        /* Make sure the system still exists */
        mutex_lock(&event_mutex);
-       list_for_each_entry(system, &event_subsystems, list) {
-               if (system == inode->i_private) {
-                       /* Don't open systems with no events */
-                       if (!system->nr_events) {
-                               system = NULL;
-                               break;
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               list_for_each_entry(dir, &tr->systems, list) {
+                       if (dir == inode->i_private) {
+                               /* Don't open systems with no events */
+                               if (dir->nr_events) {
+                                       __get_system_dir(dir);
+                                       system = dir->subsystem;
+                               }
+                               goto exit_loop;
                        }
-                       __get_system(system);
-                       break;
                }
        }
+ exit_loop:
        mutex_unlock(&event_mutex);
 
-       if (system != inode->i_private)
+       if (!system)
                return -ENODEV;
 
- skip_search:
+       /* Some versions of gcc think dir can be uninitialized here */
+       WARN_ON(!dir);
+
+       ret = tracing_open_generic(inode, filp);
+       if (ret < 0)
+               put_system(dir);
+
+       return ret;
+}
+
+static int system_tr_open(struct inode *inode, struct file *filp)
+{
+       struct ftrace_subsystem_dir *dir;
+       struct trace_array *tr = inode->i_private;
+       int ret;
+
+       /* Make a temporary dir that has no system but points to tr */
+       dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+       if (!dir)
+               return -ENOMEM;
+
+       dir->tr = tr;
+
        ret = tracing_open_generic(inode, filp);
-       if (ret < 0 && system)
-               put_system(system);
+       if (ret < 0)
+               kfree(dir);
+
+       filp->private_data = dir;
 
        return ret;
 }
 
 static int subsystem_release(struct inode *inode, struct file *file)
 {
-       struct event_subsystem *system = inode->i_private;
+       struct ftrace_subsystem_dir *dir = file->private_data;
 
-       if (system)
-               put_system(system);
+       /*
+        * If dir->subsystem is NULL, then this is a temporary
+        * descriptor that was made for a trace_array to enable
+        * all subsystems.
+        */
+       if (dir->subsystem)
+               put_system(dir);
+       else
+               kfree(dir);
 
        return 0;
 }
@@ -890,7 +1056,8 @@ static ssize_t
 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
                      loff_t *ppos)
 {
-       struct event_subsystem *system = filp->private_data;
+       struct ftrace_subsystem_dir *dir = filp->private_data;
+       struct event_subsystem *system = dir->subsystem;
        struct trace_seq *s;
        int r;
 
@@ -915,7 +1082,7 @@ static ssize_t
 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
                       loff_t *ppos)
 {
-       struct event_subsystem *system = filp->private_data;
+       struct ftrace_subsystem_dir *dir = filp->private_data;
        char *buf;
        int err;
 
@@ -932,7 +1099,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
        }
        buf[cnt] = '\0';
 
-       err = apply_subsystem_event_filter(system, buf);
+       err = apply_subsystem_event_filter(dir, buf);
        free_page((unsigned long) buf);
        if (err < 0)
                return err;
@@ -1041,30 +1208,35 @@ static const struct file_operations ftrace_system_enable_fops = {
        .release = subsystem_release,
 };
 
+static const struct file_operations ftrace_tr_enable_fops = {
+       .open = system_tr_open,
+       .read = system_enable_read,
+       .write = system_enable_write,
+       .llseek = default_llseek,
+       .release = subsystem_release,
+};
+
 static const struct file_operations ftrace_show_header_fops = {
        .open = tracing_open_generic,
        .read = show_header,
        .llseek = default_llseek,
 };
 
-static struct dentry *event_trace_events_dir(void)
+static int
+ftrace_event_open(struct inode *inode, struct file *file,
+                 const struct seq_operations *seq_ops)
 {
-       static struct dentry *d_tracer;
-       static struct dentry *d_events;
-
-       if (d_events)
-               return d_events;
-
-       d_tracer = tracing_init_dentry();
-       if (!d_tracer)
-               return NULL;
+       struct seq_file *m;
+       int ret;
 
-       d_events = debugfs_create_dir("events", d_tracer);
-       if (!d_events)
-               pr_warning("Could not create debugfs "
-                          "'events' directory\n");
+       ret = seq_open(file, seq_ops);
+       if (ret < 0)
+               return ret;
+       m = file->private_data;
+       /* copy tr over to seq ops */
+       m->private = inode->i_private;
 
-       return d_events;
+       return ret;
 }
 
 static int
@@ -1072,117 +1244,165 @@ ftrace_event_avail_open(struct inode *inode, struct file *file)
 {
        const struct seq_operations *seq_ops = &show_event_seq_ops;
 
-       return seq_open(file, seq_ops);
+       return ftrace_event_open(inode, file, seq_ops);
 }
 
 static int
 ftrace_event_set_open(struct inode *inode, struct file *file)
 {
        const struct seq_operations *seq_ops = &show_set_event_seq_ops;
+       struct trace_array *tr = inode->i_private;
 
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
-               ftrace_clear_events();
+               ftrace_clear_events(tr);
+
+       return ftrace_event_open(inode, file, seq_ops);
+}
+
+static struct event_subsystem *
+create_new_subsystem(const char *name)
+{
+       struct event_subsystem *system;
+
+       /* need to create new entry */
+       system = kmalloc(sizeof(*system), GFP_KERNEL);
+       if (!system)
+               return NULL;
+
+       system->ref_count = 1;
+       system->name = name;
+
+       system->filter = NULL;
+
+       system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+       if (!system->filter)
+               goto out_free;
+
+       list_add(&system->list, &event_subsystems);
+
+       return system;
 
-       return seq_open(file, seq_ops);
+ out_free:
+       kfree(system);
+       return NULL;
 }
 
 static struct dentry *
-event_subsystem_dir(const char *name, struct dentry *d_events)
+event_subsystem_dir(struct trace_array *tr, const char *name,
+                   struct ftrace_event_file *file, struct dentry *parent)
 {
+       struct ftrace_subsystem_dir *dir;
        struct event_subsystem *system;
        struct dentry *entry;
 
        /* First see if we did not already create this dir */
-       list_for_each_entry(system, &event_subsystems, list) {
+       list_for_each_entry(dir, &tr->systems, list) {
+               system = dir->subsystem;
                if (strcmp(system->name, name) == 0) {
-                       system->nr_events++;
-                       return system->entry;
+                       dir->nr_events++;
+                       file->system = dir;
+                       return dir->entry;
                }
        }
 
-       /* need to create new entry */
-       system = kmalloc(sizeof(*system), GFP_KERNEL);
-       if (!system) {
-               pr_warning("No memory to create event subsystem %s\n",
-                          name);
-               return d_events;
+       /* Now see if the system itself exists. */
+       list_for_each_entry(system, &event_subsystems, list) {
+               if (strcmp(system->name, name) == 0)
+                       break;
        }
+       /* Reset system variable when not found */
+       if (&system->list == &event_subsystems)
+               system = NULL;
 
-       system->entry = debugfs_create_dir(name, d_events);
-       if (!system->entry) {
-               pr_warning("Could not create event subsystem %s\n",
-                          name);
-               kfree(system);
-               return d_events;
-       }
+       dir = kmalloc(sizeof(*dir), GFP_KERNEL);
+       if (!dir)
+               goto out_fail;
 
-       system->nr_events = 1;
-       system->ref_count = 1;
-       system->name = kstrdup(name, GFP_KERNEL);
-       if (!system->name) {
-               debugfs_remove(system->entry);
-               kfree(system);
-               return d_events;
+       if (!system) {
+               system = create_new_subsystem(name);
+               if (!system)
+                       goto out_free;
+       } else
+               __get_system(system);
+
+       dir->entry = debugfs_create_dir(name, parent);
+       if (!dir->entry) {
+               pr_warning("Failed to create system directory %s\n", name);
+               __put_system(system);
+               goto out_free;
        }
 
-       list_add(&system->list, &event_subsystems);
-
-       system->filter = NULL;
-
-       system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
-       if (!system->filter) {
-               pr_warning("Could not allocate filter for subsystem "
-                          "'%s'\n", name);
-               return system->entry;
-       }
+       dir->tr = tr;
+       dir->ref_count = 1;
+       dir->nr_events = 1;
+       dir->subsystem = system;
+       file->system = dir;
 
-       entry = debugfs_create_file("filter", 0644, system->entry, system,
+       entry = debugfs_create_file("filter", 0644, dir->entry, dir,
                                    &ftrace_subsystem_filter_fops);
        if (!entry) {
                kfree(system->filter);
                system->filter = NULL;
-               pr_warning("Could not create debugfs "
-                          "'%s/filter' entry\n", name);
+               pr_warning("Could not create debugfs '%s/filter' entry\n", name);
        }
 
-       trace_create_file("enable", 0644, system->entry, system,
+       trace_create_file("enable", 0644, dir->entry, dir,
                          &ftrace_system_enable_fops);
 
-       return system->entry;
+       list_add(&dir->list, &tr->systems);
+
+       return dir->entry;
+
+ out_free:
+       kfree(dir);
+ out_fail:
+       /* Only print this message if failed on memory allocation */
+       if (!dir || !system)
+               pr_warning("No memory to create event subsystem %s\n",
+                          name);
+       return NULL;
 }
 
 static int
-event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
+event_create_dir(struct dentry *parent,
+                struct ftrace_event_file *file,
                 const struct file_operations *id,
                 const struct file_operations *enable,
                 const struct file_operations *filter,
                 const struct file_operations *format)
 {
+       struct ftrace_event_call *call = file->event_call;
+       struct trace_array *tr = file->tr;
        struct list_head *head;
+       struct dentry *d_events;
        int ret;
 
        /*
         * If the trace point header did not define TRACE_SYSTEM
         * then the system would be called "TRACE_SYSTEM".
         */
-       if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
-               d_events = event_subsystem_dir(call->class->system, d_events);
-
-       call->dir = debugfs_create_dir(call->name, d_events);
-       if (!call->dir) {
-               pr_warning("Could not create debugfs "
-                          "'%s' directory\n", call->name);
+       if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
+               d_events = event_subsystem_dir(tr, call->class->system, file, parent);
+               if (!d_events)
+                       return -ENOMEM;
+       } else
+               d_events = parent;
+
+       file->dir = debugfs_create_dir(call->name, d_events);
+       if (!file->dir) {
+               pr_warning("Could not create debugfs '%s' directory\n",
+                          call->name);
                return -1;
        }
 
        if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
-               trace_create_file("enable", 0644, call->dir, call,
+               trace_create_file("enable", 0644, file->dir, file,
                                  enable);
 
 #ifdef CONFIG_PERF_EVENTS
        if (call->event.type && call->class->reg)
-               trace_create_file("id", 0444, call->dir, call,
+               trace_create_file("id", 0444, file->dir, call,
                                  id);
 #endif
 
@@ -1196,23 +1416,76 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                if (ret < 0) {
                        pr_warning("Could not initialize trace point"
                                   " events/%s\n", call->name);
-                       return ret;
+                       return -1;
                }
        }
-       trace_create_file("filter", 0644, call->dir, call,
+       trace_create_file("filter", 0644, file->dir, call,
                          filter);
 
-       trace_create_file("format", 0444, call->dir, call,
+       trace_create_file("format", 0444, file->dir, call,
                          format);
 
        return 0;
 }
 
+static void remove_subsystem(struct ftrace_subsystem_dir *dir)
+{
+       if (!dir)
+               return;
+
+       if (!--dir->nr_events) {
+               debugfs_remove_recursive(dir->entry);
+               list_del(&dir->list);
+               __put_system_dir(dir);
+       }
+}
+
+static void remove_event_from_tracers(struct ftrace_event_call *call)
+{
+       struct ftrace_event_file *file;
+       struct trace_array *tr;
+
+       do_for_each_event_file_safe(tr, file) {
+
+               if (file->event_call != call)
+                       continue;
+
+               list_del(&file->list);
+               debugfs_remove_recursive(file->dir);
+               remove_subsystem(file->system);
+               kmem_cache_free(file_cachep, file);
+
+               /*
+                * The do_for_each_event_file_safe() is
+                * a double loop. After finding the call for this
+                * trace_array, we use break to jump to the next
+                * trace_array.
+                */
+               break;
+       } while_for_each_event_file();
+}
+
 static void event_remove(struct ftrace_event_call *call)
 {
-       ftrace_event_enable_disable(call, 0);
+       struct trace_array *tr;
+       struct ftrace_event_file *file;
+
+       do_for_each_event_file(tr, file) {
+               if (file->event_call != call)
+                       continue;
+               ftrace_event_enable_disable(file, 0);
+               /*
+                * The do_for_each_event_file() is
+                * a double loop. After finding the call for this
+                * trace_array, we use break to jump to the next
+                * trace_array.
+                */
+               break;
+       } while_for_each_event_file();
+
        if (call->event.funcs)
                __unregister_ftrace_event(&call->event);
+       remove_event_from_tracers(call);
        list_del(&call->list);
 }
 
@@ -1234,82 +1507,99 @@ static int event_init(struct ftrace_event_call *call)
 }
 
 static int
-__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
-                      const struct file_operations *id,
-                      const struct file_operations *enable,
-                      const struct file_operations *filter,
-                      const struct file_operations *format)
+__register_event(struct ftrace_event_call *call, struct module *mod)
 {
-       struct dentry *d_events;
        int ret;
 
        ret = event_init(call);
        if (ret < 0)
                return ret;
 
-       d_events = event_trace_events_dir();
-       if (!d_events)
-               return -ENOENT;
-
-       ret = event_create_dir(call, d_events, id, enable, filter, format);
-       if (!ret)
-               list_add(&call->list, &ftrace_events);
+       list_add(&call->list, &ftrace_events);
        call->mod = mod;
 
-       return ret;
+       return 0;
+}
+
+/* Add an event to a trace directory */
+static int
+__trace_add_new_event(struct ftrace_event_call *call,
+                     struct trace_array *tr,
+                     const struct file_operations *id,
+                     const struct file_operations *enable,
+                     const struct file_operations *filter,
+                     const struct file_operations *format)
+{
+       struct ftrace_event_file *file;
+
+       file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+       if (!file)
+               return -ENOMEM;
+
+       file->event_call = call;
+       file->tr = tr;
+       list_add(&file->list, &tr->events);
+
+       return event_create_dir(tr->event_dir, file, id, enable, filter, format);
+}
+
+/*
+ * Just create a decriptor for early init. A descriptor is required
+ * for enabling events at boot. We want to enable events before
+ * the filesystem is initialized.
+ */
+static __init int
+__trace_early_add_new_event(struct ftrace_event_call *call,
+                           struct trace_array *tr)
+{
+       struct ftrace_event_file *file;
+
+       file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+       if (!file)
+               return -ENOMEM;
+
+       file->event_call = call;
+       file->tr = tr;
+       list_add(&file->list, &tr->events);
+
+       return 0;
 }
 
+struct ftrace_module_file_ops;
+static void __add_event_to_tracers(struct ftrace_event_call *call,
+                                  struct ftrace_module_file_ops *file_ops);
+
 /* Add an additional event_call dynamically */
 int trace_add_event_call(struct ftrace_event_call *call)
 {
        int ret;
        mutex_lock(&event_mutex);
-       ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
-                                    &ftrace_enable_fops,
-                                    &ftrace_event_filter_fops,
-                                    &ftrace_event_format_fops);
-       mutex_unlock(&event_mutex);
-       return ret;
-}
 
-static void remove_subsystem_dir(const char *name)
-{
-       struct event_subsystem *system;
-
-       if (strcmp(name, TRACE_SYSTEM) == 0)
-               return;
+       ret = __register_event(call, NULL);
+       if (ret >= 0)
+               __add_event_to_tracers(call, NULL);
 
-       list_for_each_entry(system, &event_subsystems, list) {
-               if (strcmp(system->name, name) == 0) {
-                       if (!--system->nr_events) {
-                               debugfs_remove_recursive(system->entry);
-                               list_del(&system->list);
-                               __put_system(system);
-                       }
-                       break;
-               }
-       }
+       mutex_unlock(&event_mutex);
+       return ret;
 }
 
 /*
- * Must be called under locking both of event_mutex and trace_event_mutex.
+ * Must be called under locking both of event_mutex and trace_event_sem.
  */
 static void __trace_remove_event_call(struct ftrace_event_call *call)
 {
        event_remove(call);
        trace_destroy_fields(call);
        destroy_preds(call);
-       debugfs_remove_recursive(call->dir);
-       remove_subsystem_dir(call->class->system);
 }
 
 /* Remove an event_call */
 void trace_remove_event_call(struct ftrace_event_call *call)
 {
        mutex_lock(&event_mutex);
-       down_write(&trace_event_mutex);
+       down_write(&trace_event_sem);
        __trace_remove_event_call(call);
-       up_write(&trace_event_mutex);
+       up_write(&trace_event_sem);
        mutex_unlock(&event_mutex);
 }
 
@@ -1336,6 +1626,26 @@ struct ftrace_module_file_ops {
 };
 
 static struct ftrace_module_file_ops *
+find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
+{
+       /*
+        * As event_calls are added in groups by module,
+        * when we find one file_ops, we don't need to search for
+        * each call in that module, as the rest should be the
+        * same. Only search for a new one if the last one did
+        * not match.
+        */
+       if (file_ops && mod == file_ops->mod)
+               return file_ops;
+
+       list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
+               if (file_ops->mod == mod)
+                       return file_ops;
+       }
+       return NULL;
+}
+
+static struct ftrace_module_file_ops *
 trace_create_file_ops(struct module *mod)
 {
        struct ftrace_module_file_ops *file_ops;
@@ -1386,9 +1696,8 @@ static void trace_module_add_events(struct module *mod)
                return;
 
        for_each_event(call, start, end) {
-               __trace_add_event_call(*call, mod,
-                                      &file_ops->id, &file_ops->enable,
-                                      &file_ops->filter, &file_ops->format);
+               __register_event(*call, mod);
+               __add_event_to_tracers(*call, file_ops);
        }
 }
 
@@ -1396,12 +1705,13 @@ static void trace_module_remove_events(struct module *mod)
 {
        struct ftrace_module_file_ops *file_ops;
        struct ftrace_event_call *call, *p;
-       bool found = false;
+       bool clear_trace = false;
 
-       down_write(&trace_event_mutex);
+       down_write(&trace_event_sem);
        list_for_each_entry_safe(call, p, &ftrace_events, list) {
                if (call->mod == mod) {
-                       found = true;
+                       if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
+                               clear_trace = true;
                        __trace_remove_event_call(call);
                }
        }
@@ -1415,14 +1725,18 @@ static void trace_module_remove_events(struct module *mod)
                list_del(&file_ops->list);
                kfree(file_ops);
        }
+       up_write(&trace_event_sem);
 
        /*
         * It is safest to reset the ring buffer if the module being unloaded
-        * registered any events.
+        * registered any events that were used. The only worry is if
+        * a new module gets loaded, and takes on the same id as the events
+        * of this module. When printing out the buffer, traced events left
+        * over from this module may be passed to the new module events and
+        * unexpected results may occur.
         */
-       if (found)
-               tracing_reset_current_online_cpus();
-       up_write(&trace_event_mutex);
+       if (clear_trace)
+               tracing_reset_all_online_cpus();
 }
 
 static int trace_module_notify(struct notifier_block *self,
@@ -1443,36 +1757,575 @@ static int trace_module_notify(struct notifier_block *self,
 
        return 0;
 }
+
+static int
+__trace_add_new_mod_event(struct ftrace_event_call *call,
+                         struct trace_array *tr,
+                         struct ftrace_module_file_ops *file_ops)
+{
+       return __trace_add_new_event(call, tr,
+                                    &file_ops->id, &file_ops->enable,
+                                    &file_ops->filter, &file_ops->format);
+}
+
 #else
-static int trace_module_notify(struct notifier_block *self,
-                              unsigned long val, void *data)
+static inline struct ftrace_module_file_ops *
+find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
+{
+       return NULL;
+}
+static inline int trace_module_notify(struct notifier_block *self,
+                                     unsigned long val, void *data)
 {
        return 0;
 }
+static inline int
+__trace_add_new_mod_event(struct ftrace_event_call *call,
+                         struct trace_array *tr,
+                         struct ftrace_module_file_ops *file_ops)
+{
+       return -ENODEV;
+}
 #endif /* CONFIG_MODULES */
 
-static struct notifier_block trace_module_nb = {
-       .notifier_call = trace_module_notify,
-       .priority = 0,
-};
-
-extern struct ftrace_event_call *__start_ftrace_events[];
-extern struct ftrace_event_call *__stop_ftrace_events[];
-
-static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
-
-static __init int setup_trace_event(char *str)
+/* Create a new event directory structure for a trace directory. */
+static void
+__trace_add_event_dirs(struct trace_array *tr)
 {
-       strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
-       ring_buffer_expanded = 1;
-       tracing_selftest_disabled = 1;
+       struct ftrace_module_file_ops *file_ops = NULL;
+       struct ftrace_event_call *call;
+       int ret;
+
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (call->mod) {
+                       /*
+                        * Directories for events by modules need to
+                        * keep module ref counts when opened (as we don't
+                        * want the module to disappear when reading one
+                        * of these files). The file_ops keep account of
+                        * the module ref count.
+                        */
+                       file_ops = find_ftrace_file_ops(file_ops, call->mod);
+                       if (!file_ops)
+                               continue; /* Warn? */
+                       ret = __trace_add_new_mod_event(call, tr, file_ops);
+                       if (ret < 0)
+                               pr_warning("Could not create directory for event %s\n",
+                                          call->name);
+                       continue;
+               }
+               ret = __trace_add_new_event(call, tr,
+                                           &ftrace_event_id_fops,
+                                           &ftrace_enable_fops,
+                                           &ftrace_event_filter_fops,
+                                           &ftrace_event_format_fops);
+               if (ret < 0)
+                       pr_warning("Could not create directory for event %s\n",
+                                  call->name);
+       }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+/* Avoid typos */
+#define ENABLE_EVENT_STR       "enable_event"
+#define DISABLE_EVENT_STR      "disable_event"
+
+struct event_probe_data {
+       struct ftrace_event_file        *file;
+       unsigned long                   count;
+       int                             ref;
+       bool                            enable;
+};
+
+static struct ftrace_event_file *
+find_event_file(struct trace_array *tr, const char *system,  const char *event)
+{
+       struct ftrace_event_file *file;
+       struct ftrace_event_call *call;
+
+       list_for_each_entry(file, &tr->events, list) {
+
+               call = file->event_call;
+
+               if (!call->name || !call->class || !call->class->reg)
+                       continue;
+
+               if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
+                       continue;
+
+               if (strcmp(event, call->name) == 0 &&
+                   strcmp(system, call->class->system) == 0)
+                       return file;
+       }
+       return NULL;
+}
+
+static void
+event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
+{
+       struct event_probe_data **pdata = (struct event_probe_data **)_data;
+       struct event_probe_data *data = *pdata;
+
+       if (!data)
+               return;
+
+       if (data->enable)
+               clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
+       else
+               set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
+}
+
+static void
+event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
+{
+       struct event_probe_data **pdata = (struct event_probe_data **)_data;
+       struct event_probe_data *data = *pdata;
+
+       if (!data)
+               return;
+
+       if (!data->count)
+               return;
+
+       /* Skip if the event is in a state we want to switch to */
+       if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+               return;
+
+       if (data->count != -1)
+               (data->count)--;
+
+       event_enable_probe(ip, parent_ip, _data);
+}
+
+static int
+event_enable_print(struct seq_file *m, unsigned long ip,
+                     struct ftrace_probe_ops *ops, void *_data)
+{
+       struct event_probe_data *data = _data;
+
+       seq_printf(m, "%ps:", (void *)ip);
+
+       seq_printf(m, "%s:%s:%s",
+                  data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
+                  data->file->event_call->class->system,
+                  data->file->event_call->name);
+
+       if (data->count == -1)
+               seq_printf(m, ":unlimited\n");
+       else
+               seq_printf(m, ":count=%ld\n", data->count);
+
+       return 0;
+}
+
+static int
+event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
+                 void **_data)
+{
+       struct event_probe_data **pdata = (struct event_probe_data **)_data;
+       struct event_probe_data *data = *pdata;
+
+       data->ref++;
+       return 0;
+}
+
+static void
+event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
+                 void **_data)
+{
+       struct event_probe_data **pdata = (struct event_probe_data **)_data;
+       struct event_probe_data *data = *pdata;
+
+       if (WARN_ON_ONCE(data->ref <= 0))
+               return;
+
+       data->ref--;
+       if (!data->ref) {
+               /* Remove the SOFT_MODE flag */
+               __ftrace_event_enable_disable(data->file, 0, 1);
+               module_put(data->file->event_call->mod);
+               kfree(data);
+       }
+       *pdata = NULL;
+}
+
+static struct ftrace_probe_ops event_enable_probe_ops = {
+       .func                   = event_enable_probe,
+       .print                  = event_enable_print,
+       .init                   = event_enable_init,
+       .free                   = event_enable_free,
+};
+
+static struct ftrace_probe_ops event_enable_count_probe_ops = {
+       .func                   = event_enable_count_probe,
+       .print                  = event_enable_print,
+       .init                   = event_enable_init,
+       .free                   = event_enable_free,
+};
+
+static struct ftrace_probe_ops event_disable_probe_ops = {
+       .func                   = event_enable_probe,
+       .print                  = event_enable_print,
+       .init                   = event_enable_init,
+       .free                   = event_enable_free,
+};
+
+static struct ftrace_probe_ops event_disable_count_probe_ops = {
+       .func                   = event_enable_count_probe,
+       .print                  = event_enable_print,
+       .init                   = event_enable_init,
+       .free                   = event_enable_free,
+};
+
+static int
+event_enable_func(struct ftrace_hash *hash,
+                 char *glob, char *cmd, char *param, int enabled)
+{
+       struct trace_array *tr = top_trace_array();
+       struct ftrace_event_file *file;
+       struct ftrace_probe_ops *ops;
+       struct event_probe_data *data;
+       const char *system;
+       const char *event;
+       char *number;
+       bool enable;
+       int ret;
+
+       /* hash funcs only work with set_ftrace_filter */
+       if (!enabled)
+               return -EINVAL;
+
+       if (!param)
+               return -EINVAL;
+
+       system = strsep(&param, ":");
+       if (!param)
+               return -EINVAL;
+
+       event = strsep(&param, ":");
+
+       mutex_lock(&event_mutex);
+
+       ret = -EINVAL;
+       file = find_event_file(tr, system, event);
+       if (!file)
+               goto out;
+
+       enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
+
+       if (enable)
+               ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
+       else
+               ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
+
+       if (glob[0] == '!') {
+               unregister_ftrace_function_probe_func(glob+1, ops);
+               ret = 0;
+               goto out;
+       }
+
+       ret = -ENOMEM;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto out;
+
+       data->enable = enable;
+       data->count = -1;
+       data->file = file;
+
+       if (!param)
+               goto out_reg;
+
+       number = strsep(&param, ":");
+
+       ret = -EINVAL;
+       if (!strlen(number))
+               goto out_free;
+
+       /*
+        * We use the callback data field (which is a pointer)
+        * as our counter.
+        */
+       ret = kstrtoul(number, 0, &data->count);
+       if (ret)
+               goto out_free;
+
+ out_reg:
+       /* Don't let event modules unload while probe registered */
+       ret = try_module_get(file->event_call->mod);
+       if (!ret)
+               goto out_free;
+
+       ret = __ftrace_event_enable_disable(file, 1, 1);
+       if (ret < 0)
+               goto out_put;
+       ret = register_ftrace_function_probe(glob, ops, data);
+       if (!ret)
+               goto out_disable;
+ out:
+       mutex_unlock(&event_mutex);
+       return ret;
+
+ out_disable:
+       __ftrace_event_enable_disable(file, 0, 1);
+ out_put:
+       module_put(file->event_call->mod);
+ out_free:
+       kfree(data);
+       goto out;
+}
+
+static struct ftrace_func_command event_enable_cmd = {
+       .name                   = ENABLE_EVENT_STR,
+       .func                   = event_enable_func,
+};
+
+static struct ftrace_func_command event_disable_cmd = {
+       .name                   = DISABLE_EVENT_STR,
+       .func                   = event_enable_func,
+};
+
+static __init int register_event_cmds(void)
+{
+       int ret;
+
+       ret = register_ftrace_command(&event_enable_cmd);
+       if (WARN_ON(ret < 0))
+               return ret;
+       ret = register_ftrace_command(&event_disable_cmd);
+       if (WARN_ON(ret < 0))
+               unregister_ftrace_command(&event_enable_cmd);
+       return ret;
+}
+#else
+static inline int register_event_cmds(void) { return 0; }
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+/*
+ * The top level array has already had its ftrace_event_file
+ * descriptors created in order to allow for early events to
+ * be recorded. This function is called after the debugfs has been
+ * initialized, and we now have to create the files associated
+ * to the events.
+ */
+static __init void
+__trace_early_add_event_dirs(struct trace_array *tr)
+{
+       struct ftrace_event_file *file;
+       int ret;
+
+
+       list_for_each_entry(file, &tr->events, list) {
+               ret = event_create_dir(tr->event_dir, file,
+                                      &ftrace_event_id_fops,
+                                      &ftrace_enable_fops,
+                                      &ftrace_event_filter_fops,
+                                      &ftrace_event_format_fops);
+               if (ret < 0)
+                       pr_warning("Could not create directory for event %s\n",
+                                  file->event_call->name);
+       }
+}
+
+/*
+ * For early boot up, the top trace array requires to have
+ * a list of events that can be enabled. This must be done before
+ * the filesystem is set up in order to allow events to be traced
+ * early.
+ */
+static __init void
+__trace_early_add_events(struct trace_array *tr)
+{
+       struct ftrace_event_call *call;
+       int ret;
+
+       list_for_each_entry(call, &ftrace_events, list) {
+               /* Early boot up should not have any modules loaded */
+               if (WARN_ON_ONCE(call->mod))
+                       continue;
+
+               ret = __trace_early_add_new_event(call, tr);
+               if (ret < 0)
+                       pr_warning("Could not create early event %s\n",
+                                  call->name);
+       }
+}
+
+/* Remove the event directory structure for a trace directory. */
+static void
+__trace_remove_event_dirs(struct trace_array *tr)
+{
+       struct ftrace_event_file *file, *next;
+
+       list_for_each_entry_safe(file, next, &tr->events, list) {
+               list_del(&file->list);
+               debugfs_remove_recursive(file->dir);
+               remove_subsystem(file->system);
+               kmem_cache_free(file_cachep, file);
+       }
+}
+
+static void
+__add_event_to_tracers(struct ftrace_event_call *call,
+                      struct ftrace_module_file_ops *file_ops)
+{
+       struct trace_array *tr;
+
+       list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+               if (file_ops)
+                       __trace_add_new_mod_event(call, tr, file_ops);
+               else
+                       __trace_add_new_event(call, tr,
+                                             &ftrace_event_id_fops,
+                                             &ftrace_enable_fops,
+                                             &ftrace_event_filter_fops,
+                                             &ftrace_event_format_fops);
+       }
+}
+
+static struct notifier_block trace_module_nb = {
+       .notifier_call = trace_module_notify,
+       .priority = 0,
+};
+
+extern struct ftrace_event_call *__start_ftrace_events[];
+extern struct ftrace_event_call *__stop_ftrace_events[];
+
+static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
+
+static __init int setup_trace_event(char *str)
+{
+       strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
+       ring_buffer_expanded = true;
+       tracing_selftest_disabled = true;
 
        return 1;
 }
 __setup("trace_event=", setup_trace_event);
 
+/* Expects to have event_mutex held when called */
+static int
+create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
+{
+       struct dentry *d_events;
+       struct dentry *entry;
+
+       entry = debugfs_create_file("set_event", 0644, parent,
+                                   tr, &ftrace_set_event_fops);
+       if (!entry) {
+               pr_warning("Could not create debugfs 'set_event' entry\n");
+               return -ENOMEM;
+       }
+
+       d_events = debugfs_create_dir("events", parent);
+       if (!d_events) {
+               pr_warning("Could not create debugfs 'events' directory\n");
+               return -ENOMEM;
+       }
+
+       /* ring buffer internal formats */
+       trace_create_file("header_page", 0444, d_events,
+                         ring_buffer_print_page_header,
+                         &ftrace_show_header_fops);
+
+       trace_create_file("header_event", 0444, d_events,
+                         ring_buffer_print_entry_header,
+                         &ftrace_show_header_fops);
+
+       trace_create_file("enable", 0644, d_events,
+                         tr, &ftrace_tr_enable_fops);
+
+       tr->event_dir = d_events;
+
+       return 0;
+}
+
+/**
+ * event_trace_add_tracer - add a instance of a trace_array to events
+ * @parent: The parent dentry to place the files/directories for events in
+ * @tr: The trace array associated with these events
+ *
+ * When a new instance is created, it needs to set up its events
+ * directory, as well as other files associated with events. It also
+ * creates the event hierachry in the @parent/events directory.
+ *
+ * Returns 0 on success.
+ */
+int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
+{
+       int ret;
+
+       mutex_lock(&event_mutex);
+
+       ret = create_event_toplevel_files(parent, tr);
+       if (ret)
+               goto out_unlock;
+
+       down_write(&trace_event_sem);
+       __trace_add_event_dirs(tr);
+       up_write(&trace_event_sem);
+
+ out_unlock:
+       mutex_unlock(&event_mutex);
+
+       return ret;
+}
+
+/*
+ * The top trace array already had its file descriptors created.
+ * Now the files themselves need to be created.
+ */
+static __init int
+early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
+{
+       int ret;
+
+       mutex_lock(&event_mutex);
+
+       ret = create_event_toplevel_files(parent, tr);
+       if (ret)
+               goto out_unlock;
+
+       down_write(&trace_event_sem);
+       __trace_early_add_event_dirs(tr);
+       up_write(&trace_event_sem);
+
+ out_unlock:
+       mutex_unlock(&event_mutex);
+
+       return ret;
+}
+
+int event_trace_del_tracer(struct trace_array *tr)
+{
+       /* Disable any running events */
+       __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
+
+       mutex_lock(&event_mutex);
+
+       down_write(&trace_event_sem);
+       __trace_remove_event_dirs(tr);
+       debugfs_remove_recursive(tr->event_dir);
+       up_write(&trace_event_sem);
+
+       tr->event_dir = NULL;
+
+       mutex_unlock(&event_mutex);
+
+       return 0;
+}
+
+static __init int event_trace_memsetup(void)
+{
+       field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
+       file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC);
+       return 0;
+}
+
 static __init int event_trace_enable(void)
 {
+       struct trace_array *tr = top_trace_array();
        struct ftrace_event_call **iter, *call;
        char *buf = bootup_event_buf;
        char *token;
@@ -1486,6 +2339,14 @@ static __init int event_trace_enable(void)
                        list_add(&call->list, &ftrace_events);
        }
 
+       /*
+        * We need the top trace array to have a working set of trace
+        * points at early init, before the debug files and directories
+        * are created. Create the file entries now, and attach them
+        * to the actual file dentries later.
+        */
+       __trace_early_add_events(tr);
+
        while (true) {
                token = strsep(&buf, ",");
 
@@ -1494,73 +2355,43 @@ static __init int event_trace_enable(void)
                if (!*token)
                        continue;
 
-               ret = ftrace_set_clr_event(token, 1);
+               ret = ftrace_set_clr_event(tr, token, 1);
                if (ret)
                        pr_warn("Failed to enable trace event: %s\n", token);
        }
 
        trace_printk_start_comm();
 
+       register_event_cmds();
+
        return 0;
 }
 
 static __init int event_trace_init(void)
 {
-       struct ftrace_event_call *call;
+       struct trace_array *tr;
        struct dentry *d_tracer;
        struct dentry *entry;
-       struct dentry *d_events;
        int ret;
 
+       tr = top_trace_array();
+
        d_tracer = tracing_init_dentry();
        if (!d_tracer)
                return 0;
 
        entry = debugfs_create_file("available_events", 0444, d_tracer,
-                                   NULL, &ftrace_avail_fops);
+                                   tr, &ftrace_avail_fops);
        if (!entry)
                pr_warning("Could not create debugfs "
                           "'available_events' entry\n");
 
-       entry = debugfs_create_file("set_event", 0644, d_tracer,
-                                   NULL, &ftrace_set_event_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_event' entry\n");
-
-       d_events = event_trace_events_dir();
-       if (!d_events)
-               return 0;
-
-       /* ring buffer internal formats */
-       trace_create_file("header_page", 0444, d_events,
-                         ring_buffer_print_page_header,
-                         &ftrace_show_header_fops);
-
-       trace_create_file("header_event", 0444, d_events,
-                         ring_buffer_print_entry_header,
-                         &ftrace_show_header_fops);
-
-       trace_create_file("enable", 0644, d_events,
-                         NULL, &ftrace_system_enable_fops);
-
        if (trace_define_common_fields())
                pr_warning("tracing: Failed to allocate common fields");
 
-       /*
-        * Early initialization already enabled ftrace event.
-        * Now it's only necessary to create the event directory.
-        */
-       list_for_each_entry(call, &ftrace_events, list) {
-
-               ret = event_create_dir(call, d_events,
-                                      &ftrace_event_id_fops,
-                                      &ftrace_enable_fops,
-                                      &ftrace_event_filter_fops,
-                                      &ftrace_event_format_fops);
-               if (ret < 0)
-                       event_remove(call);
-       }
+       ret = early_event_add_tracer(d_tracer, tr);
+       if (ret)
+               return ret;
 
        ret = register_module_notifier(&trace_module_nb);
        if (ret)
@@ -1568,6 +2399,7 @@ static __init int event_trace_init(void)
 
        return 0;
 }
+early_initcall(event_trace_memsetup);
 core_initcall(event_trace_enable);
 fs_initcall(event_trace_init);
 
@@ -1627,13 +2459,20 @@ static __init void event_test_stuff(void)
  */
 static __init void event_trace_self_tests(void)
 {
+       struct ftrace_subsystem_dir *dir;
+       struct ftrace_event_file *file;
        struct ftrace_event_call *call;
        struct event_subsystem *system;
+       struct trace_array *tr;
        int ret;
 
+       tr = top_trace_array();
+
        pr_info("Running tests on trace events:\n");
 
-       list_for_each_entry(call, &ftrace_events, list) {
+       list_for_each_entry(file, &tr->events, list) {
+
+               call = file->event_call;
 
                /* Only test those that have a probe */
                if (!call->class || !call->class->probe)
@@ -1657,15 +2496,15 @@ static __init void event_trace_self_tests(void)
                 * If an event is already enabled, someone is using
                 * it and the self test should not be on.
                 */
-               if (call->flags & TRACE_EVENT_FL_ENABLED) {
+               if (file->flags & FTRACE_EVENT_FL_ENABLED) {
                        pr_warning("Enabled event during self test!\n");
                        WARN_ON_ONCE(1);
                        continue;
                }
 
-               ftrace_event_enable_disable(call, 1);
+               ftrace_event_enable_disable(file, 1);
                event_test_stuff();
-               ftrace_event_enable_disable(call, 0);
+               ftrace_event_enable_disable(file, 0);
 
                pr_cont("OK\n");
        }
@@ -1674,7 +2513,9 @@ static __init void event_trace_self_tests(void)
 
        pr_info("Running tests on trace event systems:\n");
 
-       list_for_each_entry(system, &event_subsystems, list) {
+       list_for_each_entry(dir, &tr->systems, list) {
+
+               system = dir->subsystem;
 
                /* the ftrace system is special, skip it */
                if (strcmp(system->name, "ftrace") == 0)
@@ -1682,7 +2523,7 @@ static __init void event_trace_self_tests(void)
 
                pr_info("Testing event system %s: ", system->name);
 
-               ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
+               ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
                if (WARN_ON_ONCE(ret)) {
                        pr_warning("error enabling system %s\n",
                                   system->name);
@@ -1691,7 +2532,7 @@ static __init void event_trace_self_tests(void)
 
                event_test_stuff();
 
-               ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
+               ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
                if (WARN_ON_ONCE(ret)) {
                        pr_warning("error disabling system %s\n",
                                   system->name);
@@ -1706,7 +2547,7 @@ static __init void event_trace_self_tests(void)
        pr_info("Running tests on all trace events:\n");
        pr_info("Testing all events: ");
 
-       ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
+       ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
        if (WARN_ON_ONCE(ret)) {
                pr_warning("error enabling all events\n");
                return;
@@ -1715,7 +2556,7 @@ static __init void event_trace_self_tests(void)
        event_test_stuff();
 
        /* reset sysname */
-       ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
+       ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
        if (WARN_ON_ONCE(ret)) {
                pr_warning("error disabling all events\n");
                return;