From: Robert Richter Date: Fri, 15 Oct 2010 10:45:00 +0000 (+0200) Subject: Merge remote branch 'tip/perf/core' into oprofile/core X-Git-Tag: v2.6.37-rc1~214^2~18^2~3 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=6268464b370e234e0255330190f9bd5d19386ad7;p=~shefty%2Frdma-dev.git Merge remote branch 'tip/perf/core' into oprofile/core Conflicts: arch/arm/oprofile/common.c kernel/perf_event.c --- 6268464b370e234e0255330190f9bd5d19386ad7 diff --cc drivers/oprofile/oprofile_perf.c index b17235a24a4,00000000000..79c0005134a mode 100644,000000..100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@@ -1,323 -1,0 +1,323 @@@ +/* + * Copyright 2010 ARM Ltd. + * + * Perf-events backend for OProfile. + */ +#include +#include +#include + +/* + * Per performance monitor configuration as set via oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long unit_mask; + unsigned long kernel; + unsigned long user; + struct perf_event_attr attr; +}; + +static int oprofile_perf_enabled; +static DEFINE_MUTEX(oprofile_perf_mutex); + +static struct op_counter_config *counter_config; +static struct perf_event **perf_events[nr_cpumask_bits]; +static int num_counters; + +/* + * Overflow callback for oprofile. + */ +static void op_overflow_handler(struct perf_event *event, int unused, + struct perf_sample_data *data, struct pt_regs *regs) +{ + int id; + u32 cpu = smp_processor_id(); + + for (id = 0; id < num_counters; ++id) + if (perf_events[cpu][id] == event) + break; + + if (id != num_counters) + oprofile_add_sample(regs, id); + else + pr_warning("oprofile: ignoring spurious overflow " + "on cpu %u\n", cpu); +} + +/* + * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile + * settings in counter_config. Attributes are created as `pinned' events and + * so are permanently scheduled on the PMU. + */ +static void op_perf_setup(void) +{ + int i; + u32 size = sizeof(struct perf_event_attr); + struct perf_event_attr *attr; + + for (i = 0; i < num_counters; ++i) { + attr = &counter_config[i].attr; + memset(attr, 0, size); + attr->type = PERF_TYPE_RAW; + attr->size = size; + attr->config = counter_config[i].event; + attr->sample_period = counter_config[i].count; + attr->pinned = 1; + } +} + +static int op_create_counter(int cpu, int event) +{ + struct perf_event *pevent; + + if (!counter_config[event].enabled || perf_events[cpu][event]) + return 0; + + pevent = perf_event_create_kernel_counter(&counter_config[event].attr, - cpu, -1, ++ cpu, NULL, + op_overflow_handler); + + if (IS_ERR(pevent)) + return PTR_ERR(pevent); + + if (pevent->state != PERF_EVENT_STATE_ACTIVE) { + perf_event_release_kernel(pevent); + pr_warning("oprofile: failed to enable event %d " + "on CPU %d\n", event, cpu); + return -EBUSY; + } + + perf_events[cpu][event] = pevent; + + return 0; +} + +static void op_destroy_counter(int cpu, int event) +{ + struct perf_event *pevent = perf_events[cpu][event]; + + if (pevent) { + perf_event_release_kernel(pevent); + perf_events[cpu][event] = NULL; + } +} + +/* + * Called by oprofile_perf_start to create active perf events based on the + * perviously configured attributes. + */ +static int op_perf_start(void) +{ + int cpu, event, ret = 0; + + for_each_online_cpu(cpu) { + for (event = 0; event < num_counters; ++event) { + ret = op_create_counter(cpu, event); + if (ret) + return ret; + } + } + + return ret; +} + +/* + * Called by oprofile_perf_stop at the end of a profiling run. + */ +static void op_perf_stop(void) +{ + int cpu, event; + + for_each_online_cpu(cpu) + for (event = 0; event < num_counters; ++event) + op_destroy_counter(cpu, event); +} + +static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root) +{ + unsigned int i; + + for (i = 0; i < num_counters; i++) { + struct dentry *dir; + char buf[4]; + + snprintf(buf, sizeof buf, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + } + + return 0; +} + +static int oprofile_perf_setup(void) +{ + spin_lock(&oprofilefs_lock); + op_perf_setup(); + spin_unlock(&oprofilefs_lock); + return 0; +} + +static int oprofile_perf_start(void) +{ + int ret = -EBUSY; + + mutex_lock(&oprofile_perf_mutex); + if (!oprofile_perf_enabled) { + ret = 0; + op_perf_start(); + oprofile_perf_enabled = 1; + } + mutex_unlock(&oprofile_perf_mutex); + return ret; +} + +static void oprofile_perf_stop(void) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled) + op_perf_stop(); + oprofile_perf_enabled = 0; + mutex_unlock(&oprofile_perf_mutex); +} + +#ifdef CONFIG_PM +static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled) + op_perf_stop(); + mutex_unlock(&oprofile_perf_mutex); + return 0; +} + +static int oprofile_perf_resume(struct platform_device *dev) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled && op_perf_start()) + oprofile_perf_enabled = 0; + mutex_unlock(&oprofile_perf_mutex); + return 0; +} + +static struct platform_driver oprofile_driver = { + .driver = { + .name = "oprofile-perf", + }, + .resume = oprofile_perf_resume, + .suspend = oprofile_perf_suspend, +}; + +static struct platform_device *oprofile_pdev; + +static int __init init_driverfs(void) +{ + int ret; + + ret = platform_driver_register(&oprofile_driver); + if (ret) + return ret; + + oprofile_pdev = platform_device_register_simple( + oprofile_driver.driver.name, 0, NULL, 0); + if (IS_ERR(oprofile_pdev)) { + ret = PTR_ERR(oprofile_pdev); + platform_driver_unregister(&oprofile_driver); + } + + return ret; +} + +static void __exit exit_driverfs(void) +{ + platform_device_unregister(oprofile_pdev); + platform_driver_unregister(&oprofile_driver); +} +#else +static int __init init_driverfs(void) { return 0; } +#define exit_driverfs() do { } while (0) +#endif /* CONFIG_PM */ + +void oprofile_perf_exit(void) +{ + int cpu, id; + struct perf_event *event; + + for_each_possible_cpu(cpu) { + for (id = 0; id < num_counters; ++id) { + event = perf_events[cpu][id]; + if (event) + perf_event_release_kernel(event); + } + + kfree(perf_events[cpu]); + } + + kfree(counter_config); + exit_driverfs(); +} + +int __init oprofile_perf_init(struct oprofile_operations *ops) +{ + int cpu, ret = 0; + + ret = init_driverfs(); + if (ret) + return ret; + + memset(&perf_events, 0, sizeof(perf_events)); + + num_counters = perf_num_counters(); + if (num_counters <= 0) { + pr_info("oprofile: no performance counters\n"); + ret = -ENODEV; + goto out; + } + + counter_config = kcalloc(num_counters, + sizeof(struct op_counter_config), GFP_KERNEL); + + if (!counter_config) { + pr_info("oprofile: failed to allocate %d " + "counters\n", num_counters); + ret = -ENOMEM; + num_counters = 0; + goto out; + } + + for_each_possible_cpu(cpu) { + perf_events[cpu] = kcalloc(num_counters, + sizeof(struct perf_event *), GFP_KERNEL); + if (!perf_events[cpu]) { + pr_info("oprofile: failed to allocate %d perf events " + "for cpu %d\n", num_counters, cpu); + ret = -ENOMEM; + goto out; + } + } + + ops->create_files = oprofile_perf_create_files; + ops->setup = oprofile_perf_setup; + ops->start = oprofile_perf_start; + ops->stop = oprofile_perf_stop; + ops->shutdown = oprofile_perf_stop; + ops->cpu_type = op_name_from_perf_id(); + + if (!ops->cpu_type) + ret = -ENODEV; + else + pr_info("oprofile: using %s\n", ops->cpu_type); + +out: + if (ret) + oprofile_perf_exit(); + + return ret; +} diff --cc include/linux/perf_event.h index 33f08dafda2,61b1e2d760f..a9227e98520 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@@ -842,18 -887,11 +887,13 @@@ struct perf_output_handle #ifdef CONFIG_PERF_EVENTS - /* - * Set by architecture code: - */ - extern int perf_max_events; - - extern const struct pmu *hw_perf_event_init(struct perf_event *event); + extern int perf_pmu_register(struct pmu *pmu); + extern void perf_pmu_unregister(struct pmu *pmu); +extern int perf_num_counters(void); +extern const char *perf_pmu_name(void); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); - extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); diff --cc kernel/perf_event.c index fc512684423,64507eaa2d9..1ec3916ffef --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@@ -67,41 -61,38 +61,43 @@@ int sysctl_perf_event_sample_rate __rea static atomic64_t perf_event_id; - /* - * Lock for (sysadmin-configurable) event reservations: - */ - static DEFINE_SPINLOCK(perf_resource_lock); - - /* - * Architecture provided APIs - weak aliases: - */ - extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) - { - return NULL; - } - - void __weak hw_perf_disable(void) { barrier(); } - void __weak hw_perf_enable(void) { barrier(); } - void __weak perf_event_print_debug(void) { } +extern __weak const char *perf_pmu_name(void) +{ + return "pmu"; +} + - static DEFINE_PER_CPU(int, perf_disable_count); + void perf_pmu_disable(struct pmu *pmu) + { + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!(*count)++) + pmu->pmu_disable(pmu); + } - void perf_disable(void) + void perf_pmu_enable(struct pmu *pmu) { - if (!__get_cpu_var(perf_disable_count)++) - hw_perf_disable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!--(*count)) + pmu->pmu_enable(pmu); } - void perf_enable(void) + static DEFINE_PER_CPU(struct list_head, rotation_list); + + /* + * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized + * because they're strictly cpu affine and rotate_start is called with IRQs + * disabled, while rotate_context is called from IRQ context. + */ + static void perf_pmu_rotate_start(struct pmu *pmu) { - if (!--__get_cpu_var(perf_disable_count)) - hw_perf_enable(); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + struct list_head *head = &__get_cpu_var(rotation_list); + + WARN_ON(!irqs_disabled()); + + if (list_empty(&cpuctx->rotation_list)) + list_add(&cpuctx->rotation_list, head); } static void get_ctx(struct perf_event_context *ctx)