1 diff --git a/Documentation/ABI/testing/sysfs-perfmon b/Documentation/ABI/testing/sysfs-perfmon
5 +++ b/Documentation/ABI/testing/sysfs-perfmon
7 +What: /sys/kernel/perfmon
10 +Contact: eranian@gmail.com
12 +Description: provide the configuration interface for the perfmon2 subsystems.
13 + The tree contains information about the detected hardware, current
14 + state of the subsystem as well as some configuration parameters.
16 + The tree consists of the following entries:
18 + /sys/kernel/perfmon/debug (read-write):
20 + Enable perfmon2 debugging output via klogd. Debug messages produced during
21 + PMU interrupt handling are not controlled by this entry. The traces a rate-limited
22 + to avoid flooding of the console. It is possible to change the throttling
23 + via /proc/sys/kernel/printk_ratelimit. The value is interpreted as a bitmask.
24 + Each bit enables a particular type of debug messages. Refer to the file
25 + include/linux/perfmon_kern.h for more information
27 + /sys/kernel/perfmon/pmc_max_fast_arg (read-only):
29 + Number of perfmon2 syscall arguments copied directly onto the
30 + stack (copy_from_user) for pfm_write_pmcs(). Copying to the stack avoids
31 + having to allocate a buffer. The unit is the number of pfarg_pmc_t
34 + /sys/kernel/perfmon/pmd_max_fast_arg (read-only):
36 + Number of perfmon2 syscall arguments copied directly onto the
37 + stack (copy_from_user) for pfm_write_pmds()/pfm_read_pmds(). Copying
38 + to the stack avoids having to allocate a buffer. The unit is the number
39 + of pfarg_pmd_t structures.
42 + /sys/kernel/perfmon/reset_stats (write-only):
44 + Reset the statistics collected by perfmon2. Stats are available
45 + per-cpu via debugfs.
47 + /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only):
49 + Reports the amount of memory currently dedicated to sampling
50 + buffers by the kernel. The unit is byte.
52 + /sys/kernel/perfmon/smpl_buffer_mem_max (read-write):
54 + Maximum amount of kernel memory usable for sampling buffers. -1 means
55 + everything that is available. Unit is byte.
57 + /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only):
59 + Current utilization of kernel memory in bytes.
61 + /sys/kernel/perfmon/sys_group (read-write):
63 + Users group allowed to create a system-wide perfmon2 context (session).
64 + -1 means any group. This control will be kept until we find a package
65 + able to control capabilities via PAM.
67 + /sys/kernel/perfmon/task_group (read-write):
69 + Users group allowed to create a per-thread context (session).
70 + -1 means any group. This control will be kept until we find a
71 + package able to control capabilities via PAM.
73 + /sys/kernel/perfmon/sys_sessions_count (read-only):
75 + Number of system-wide contexts currently attached to CPUs.
77 + /sys/kernel/perfmon/task_sessions_count (read-only):
79 + Number of per-thread contexts currently attached to threads.
81 + /sys/kernel/perfmon/version (read-only):
83 + Perfmon2 interface revision number.
85 + /sys/kernel/perfmon/arg_mem_max(read-write):
87 + Maximum size of vector arguments expressed in bytes. Can be modified
89 + /sys/kernel/perfmon/mode(read-write):
91 + Bitmask to enable/disable certain perfmon2 features.
93 + - bit 0: if set, then reserved bitfield are ignored on PMC writes
94 diff --git a/Documentation/ABI/testing/sysfs-perfmon-fmt b/Documentation/ABI/testing/sysfs-perfmon-fmt
96 index 0000000..1b45270
98 +++ b/Documentation/ABI/testing/sysfs-perfmon-fmt
100 +What: /sys/kernel/perfmon/formats
102 +KernelVersion: 2.6.24
103 +Contact: eranian@gmail.com
105 +Description: provide description of available perfmon2 custom sampling buffer formats
106 + which are implemented as independent kernel modules. Each formats gets
107 + a subdir which a few entries.
109 + The name of the subdir is the name of the sampling format. The same name
110 + must be passed to pfm_create_context() to use the format.
112 + Each subdir XX contains the following entries:
114 + /sys/kernel/perfmon/formats/XX/version (read-only):
116 + Version number of the format in clear text and null terminated.
118 diff --git a/Documentation/ABI/testing/sysfs-perfmon-pmu b/Documentation/ABI/testing/sysfs-perfmon-pmu
120 index 0000000..a1afc7e
122 +++ b/Documentation/ABI/testing/sysfs-perfmon-pmu
124 +What: /sys/kernel/perfmon/pmu
126 +KernelVersion: 2.6.24
127 +Contact: eranian@gmail.com
129 +Description: provide information about the currently loaded PMU description module.
130 + The module contains the mapping of the actual performance counter registers
131 + onto the logical PMU exposed by perfmon. There is at most one PMU description
132 + module loaded at any time.
134 + The sysfs PMU tree provides a description of the mapping for each register.
135 + There is one subdir per config and data registers along an entry for the
136 + name of the PMU model.
138 + The model entry is as follows:
140 + /sys/kernel/perfmon/pmu_desc/model (read-only):
142 + Name of the PMU model is clear text and zero terminated.
144 + Then for each logical PMU register, XX, gets a subtree with the following entries:
146 + /sys/kernel/perfmon/pmu_desc/pm*XX/addr (read-only):
148 + The physical address or index of the actual underlying hardware register.
149 + On Itanium, it corresponds to the index. But on X86 processor, this is
150 + the actual MSR address.
152 + /sys/kernel/perfmon/pmu_desc/pm*XX/dfl_val (read-only):
154 + The default value of the register in hexadecimal.
156 + /sys/kernel/perfmon/pmu_desc/pm*XX/name (read-only):
158 + The name of the hardware register.
160 + /sys/kernel/perfmon/pmu_desc/pm*XX/rsvd_msk (read-only):
162 + The bitmask of reserved bits, i.e., bits which cannot be changed by
163 + applications. When a bit is set, it means the corresponding bit in the
164 + actual register is reserved.
166 + /sys/kernel/perfmon/pmu_desc/pm*XX/width (read-only):
168 + the width in bits of the registers. This field is only relevant for counter
170 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
171 index 1150444..2652b6c 100644
172 --- a/Documentation/kernel-parameters.txt
173 +++ b/Documentation/kernel-parameters.txt
174 @@ -1643,6 +1643,9 @@ and is between 256 and 4096 characters. It is defined in the file
176 See arch/parisc/kernel/pdc_chassis.c
178 + perfmon_debug [PERFMON] Enables Perfmon debug messages. Needed
179 + to see traces of the early startup startup phase.
182 See Documentation/paride.txt.
184 diff --git a/Documentation/perfmon2-debugfs.txt b/Documentation/perfmon2-debugfs.txt
186 index 0000000..b30cae8
188 +++ b/Documentation/perfmon2-debugfs.txt
190 + The perfmon2 debug and statistics interface
191 + ------------------------------------------
193 + <eranian@gmail.com>
195 +The perfmon2 interfaces exports a set of statistics which are used to tune and
196 +debug the implementation. The data is composed of a set of very simple metrics
197 +mostly aggregated counts and durations. They instruments key points in the
198 +perfmon2 code, such as context switch and interrupt handling.
200 +The data is accessible via the debug filesystem (debugfs). Thus you need to
201 +have the filesystem support enabled in your kernel. Furthermore since, 2.6.25,
202 +the perfmon2 statistics interface is an optional component. It needs to be
203 +explicitely enabled in the kernel config file (CONFIG_PERFMON_DEBUG_FS).
205 +To access the data, the debugs filesystem must be mounted. Supposing the mount
206 +point is /debugfs, you would need to do:
207 + $ mount -t debugs none /debugfs
209 +The data is located under the perfmon subdirectory and is organized per CPU.
210 +For each CPU, the same set of metrics is available, one metric per file in
213 +The metrics are as follows:
215 + ctxswin_count (read-only):
217 + Number of PMU context switch in.
219 + ctxswin_ns (read-only):
221 + Number of nanoseconds spent in the PMU context switch in
222 + routine. Dividing this number by the value of ctxswin_count,
223 + yields average cost of the PMU context switch in.
225 + ctxswout_count (read-only):
227 + Number of PMU context switch out.
229 + ctxswout_ns (read-only):
231 + Number of nanoseconds spent in the PMU context switch in
232 + routine. Dividing this number by the value of ctxswout_count,
233 + yields average cost of the PMU context switch out.
235 + fmt_handler_calls (read-only):
237 + Number of calls to the sampling format routine that handles
238 + PMU interrupts, i.e., typically the routine that records a
241 + fmt_handler_ns (read-only):
243 + Number of nanoseconds spent in the routine that handle PMU
244 + interrupt in the sampling format. Dividing this number by
245 + the number of calls provided by fmt_handler_calls, yields
246 + average time spent in this routine.
248 + ovfl_intr_all_count (read-only):
250 + Number of PMU interrupts received by the kernel.
253 + ovfl_intr_nmi_count (read-only):
255 + Number of Non Maskeable Interrupts (NMI) received by the kernel
256 + for perfmon. This is relevant only on X86 hardware.
258 + ovfl_intr_ns (read-only):
260 + Number of nanoseconds spent in the perfmon2 PMU interrupt
261 + handler routine. Dividing this number of ovfl_intr_all_count
262 + yields the average time to handle one PMU interrupt.
264 + ovfl_intr_regular_count (read-only):
266 + Number of PMU interrupts which are actually processed by
267 + the perfmon interrupt handler. There may be spurious or replay
270 + ovfl_intr_replay_count (read-only):
272 + Number of PMU interrupts which were replayed on context switch
273 + in or on event set switching. Interrupts get replayed when they
274 + were in flight at the time monitoring had to be stopped.
276 + perfmon/ovfl_intr_spurious_count (read-only):
278 + Number of PMU interrupts which were dropped because there was
279 + no active context (session).
281 + ovfl_notify_count (read-only):
283 + Number of user level notifications sent. Notifications are
284 + appended as messages to the context queue. Notifications may
285 + be sent on PMU interrupts.
287 + pfm_restart_count (read-only):
289 + Number of times pfm_restart() is called.
291 + reset_pmds_count (read-only):
293 + Number of times pfm_reset_pmds() is called.
295 + set_switch_count (read-only):
297 + Number of event set switches.
299 + set_switch_ns (read-only):
301 + Number of nanoseconds spent in the set switching routine.
302 + Dividing this number by set_switch_count yields the average
303 + cost of switching sets.
305 + handle_timeout_count (read-only):
307 + Number of times the pfm_handle_timeout() routine is called.
308 + It is used for timeout-based set switching.
310 + handle_work_count (read-only):
312 + Number of times pfm_handle_work() is called. The routine
313 + handles asynchronous perfmon2 work for per-thread contexts
316 diff --git a/Documentation/perfmon2.txt b/Documentation/perfmon2.txt
318 index 0000000..4a8fada
320 +++ b/Documentation/perfmon2.txt
322 + The perfmon2 hardware monitoring interface
323 + ------------------------------------------
325 + <eranian@gmail.com>
329 + The perfmon2 interface provides access to the hardware performance counters of
330 + major processors. Nowadays, all processors implement some flavors of performance
331 + counters which capture micro-architectural level information such as the number
332 + of elapsed cycles, number of cache misses, and so on.
334 + The interface is implemented as a set of new system calls and a set of config files
337 + It is possible to monitoring a single thread or a CPU. In either mode, applications
338 + can count or collect samples. System-wide monitoring is supported by running a
339 + monitoring session on each CPU. The interface support event-based sampling where the
340 + sampling period is expressed as the number of occurrences of event, instead of just a
341 + timeout. This approach provides a much better granularity and flexibility.
343 + For performance reason, it is possible to use a kernel-level sampling buffer to minimize
344 + the overhead incurred by sampling. The format of the buffer, i.e., what is recorded, how
345 + it is recorded, and how it is exported to user-land is controlled by a kernel module called
346 + a custom sampling format. The current implementation comes with a default format but
347 + it is possible to create additional formats. There is an in-kernel registration
348 + interface for formats. Each format is identified by a simple string which a tool
349 + can pass when a monitoring session is created.
351 + The interface also provides support for event set and multiplexing to work around
352 + hardware limitations in the number of available counters or in how events can be
353 + combined. Each set defines as many counters as the hardware can support. The kernel
354 + then multiplexes the sets. The interface supports time-base switching but also
355 + overflow based switching, i.e., after n overflows of designated counters.
357 + Applications never manipulates the actual performance counter registers. Instead they see
358 + a logical Performance Monitoring Unit (PMU) composed of a set of config register (PMC)
359 + and a set of data registers (PMD). Note that PMD are not necessarily counters, they
360 + can be buffers. The logical PMU is then mapped onto the actual PMU using a mapping
361 + table which is implemented as a kernel module. The mapping is chosen once for each
362 + new processor. It is visible in /sys/kernel/perfmon/pmu_desc. The kernel module
363 + is automatically loaded on first use.
365 + A monitoring session, or context, is uniquely identified by a file descriptor
366 + obtained when the context is created. File sharing semantics apply to access
367 + the context inside a process. A context is never inherited across fork. The file
368 + descriptor can be used to received counter overflow notifications or when the
369 + sampling buffer is full. It is possible to use poll/select on the descriptor
370 + to wait for notifications from multiplex contexts. Similarly, the descriptor
371 + supports asynchronous notification via SIGIO.
373 + Counters are always exported as being 64-bit wide regardless of what the underlying
374 + hardware implements.
376 +II/ Kernel compilation
378 + To enable perfmon2, you need to enable CONFIG_PERFMON
380 +III/ OProfile interactions
382 + The set of features offered by perfmon2 is rich enough to support migrating
383 + Oprofile on top of it. That means that PMU programming and low-level interrupt
384 + handling could be done by perfmon2. The Oprofile sampling buffer management code
385 + in the kernel as well as how samples are exported to users could remain through
386 + the use of a custom sampling buffer format. This is how Oprofile work on Itanium.
388 + The current interactions with Oprofile are:
389 + - on X86: Both subsystems can be compiled into the same kernel. There is enforced
390 + mutual exclusion between the two subsystems. When there is an Oprofile
391 + session, no perfmon2 session can exist and vice-versa. Perfmon2 session
392 + encapsulates both per-thread and system-wide sessions here.
394 + - On IA-64: Oprofile works on top of perfmon2. Oprofile being a system-wide monitoring
395 + tool, the regular per-thread vs. system-wide session restrictions apply.
397 + - on PPC: no integration yet. You need to enable/disble one of the two subsystems
398 + - on MIPS: no integration yet. You need to enable/disble one of the two subsystems
402 + We have released a simple monitoring tool to demonstrate the feature of the
403 + interface. The tool is called pfmon and it comes with a simple helper library
404 + called libpfm. The library comes with a set of examples to show how to use the
405 + kernel perfmon2 interface. Visit http://perfmon2.sf.net for details.
407 + There maybe other tools available for perfmon2.
411 + The best way to learn how to program perfmon2, is to take a look at the source
412 + code for the examples in libpfm. The source code is available from:
413 + http://perfmon2.sf.net
415 +VI/ System calls overview
417 + The interface is implemented by the following system calls:
419 + * int pfm_create_context(pfarg_ctx_t *ctx, char *fmt, void *arg, size_t arg_size)
421 + This function create a perfmon2 context. The type of context is per-thread by
422 + default unless PFM_FL_SYSTEM_WIDE is passed in ctx. The sampling format name
423 + is passed in fmt. Arguments to the format are passed in arg which is of size
424 + arg_size. Upon successful return, the file descriptor identifying the context
427 + * int pfm_write_pmds(int fd, pfarg_pmd_t *pmds, int n)
429 + This function is used to program the PMD registers. It is possible to pass
432 + * int pfm_write_pmcs(int fd, pfarg_pmc_t *pmds, int n)
434 + This function is used to program the PMC registers. It is possible to pass
437 + * int pfm_read_pmds(int fd, pfarg_pmd_t *pmds, int n)
439 + This function is used to read the PMD registers. It is possible to pass
442 + * int pfm_load_context(int fd, pfarg_load_t *load)
444 + This function is used to attach the context to a thread or CPU.
445 + Thread means kernel-visible thread (NPTL). The thread identification
446 + as obtained by gettid must be passed to load->load_target.
448 + To operate on another thread (not self), it is mandatory that the thread
449 + be stopped via ptrace().
451 + To attach to a CPU, the CPU number must be specified in load->load_target
452 + AND the call must be issued on that CPU. To monitor a CPU, a thread MUST
453 + be pinned on that CPU.
455 + Until the context is attached, the actual counters are not accessed.
457 + * int pfm_unload_context(int fd)
459 + The context is detached for the thread or CPU is was attached to.
460 + As a consequence monitoring is stopped.
462 + When monitoring another thread, the thread MUST be stopped via ptrace()
463 + for this function to succeed.
465 + * int pfm_start(int fd, pfarg_start_t *st)
467 + Start monitoring. The context must be attached for this function to succeed.
468 + Optionally, it is possible to specify the event set on which to start using the
469 + st argument, otherwise just pass NULL.
471 + When monitoring another thread, the thread MUST be stopped via ptrace()
472 + for this function to succeed.
474 + * int pfm_stop(int fd)
476 + Stop monitoring. The context must be attached for this function to succeed.
478 + When monitoring another thread, the thread MUST be stopped via ptrace()
479 + for this function to succeed.
482 + * int pfm_create_evtsets(int fd, pfarg_setdesc_t *sets, int n)
484 + This function is used to create or change event sets. By default set 0 exists.
485 + It is possible to create/change multiple sets in one call.
487 + The context must be detached for this call to succeed.
489 + Sets are identified by a 16-bit integer. They are sorted based on this
490 + set and switching occurs in a round-robin fashion.
492 + * int pfm_delete_evtsets(int fd, pfarg_setdesc_t *sets, int n)
494 + Delete event sets. The context must be detached for this call to succeed.
497 + * int pfm_getinfo_evtsets(int fd, pfarg_setinfo_t *sets, int n)
499 + Retrieve information about event sets. In particular it is possible
500 + to get the number of activation of a set. It is possible to retrieve
501 + information about multiple sets in one call.
504 + * int pfm_restart(int fd)
506 + Indicate to the kernel that the application is done processing an overflow
507 + notification. A consequence of this call could be that monitoring resumes.
509 + * int read(fd, pfm_msg_t *msg, sizeof(pfm_msg_t))
511 + the regular read() system call can be used with the context file descriptor to
512 + receive overflow notification messages. Non-blocking read() is supported.
514 + Each message carry information about the overflow such as which counter overflowed
515 + and where the program was (interrupted instruction pointer).
517 + * int close(int fd)
519 + To destroy a context, the regular close() system call is used.
522 +VII/ /sys interface overview
524 + Refer to Documentation/ABI/testing/sysfs-perfmon-* for a detailed description
525 + of the sysfs interface of perfmon2.
527 +VIII/ debugfs interface overview
529 + Refer to Documentation/perfmon2-debugfs.txt for a detailed description of the
530 + debug and statistics interface of perfmon2.
534 + Visit http://perfmon2.sf.net
535 diff --git a/MAINTAINERS b/MAINTAINERS
536 index 8dae455..fb38c2a 100644
539 @@ -3239,6 +3239,14 @@ M: balbir@linux.vnet.ibm.com
540 L: linux-kernel@vger.kernel.org
545 +M: eranian@gmail.com
546 +L: perfmon2-devel@lists.sf.net
547 +W: http://perfmon2.sf.net
548 +T: git kernel.org:/pub/scm/linux/kernel/git/eranian/linux-2.6
554 diff --git a/Makefile b/Makefile
555 index 16e3fbb..7bb1320 100644
558 @@ -620,6 +620,7 @@ export mod_strip_cmd
560 ifeq ($(KBUILD_EXTMOD),)
561 core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
562 +core-$(CONFIG_PERFMON) += perfmon/
564 vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
565 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
566 diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
567 index 48e496f..1d79b01 100644
568 --- a/arch/ia64/Kconfig
569 +++ b/arch/ia64/Kconfig
570 @@ -470,14 +470,6 @@ config COMPAT_FOR_U64_ALIGNMENT
571 config IA64_MCA_RECOVERY
572 tristate "MCA recovery from errors other than TLB."
575 - bool "Performance monitor support"
577 - Selects whether support for the IA-64 performance monitor hardware
578 - is included in the kernel. This makes some kernel data-structures a
579 - little bigger and slows down execution a bit, but it is generally
580 - a good idea to turn this on. If you're unsure, say Y.
583 tristate "/proc/pal support"
585 @@ -549,6 +541,8 @@ source "drivers/firmware/Kconfig"
587 source "fs/Kconfig.binfmt"
589 +source "arch/ia64/perfmon/Kconfig"
593 menu "Power management and ACPI"
594 diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
595 index 905d25b..9aa622d 100644
596 --- a/arch/ia64/Makefile
597 +++ b/arch/ia64/Makefile
598 @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
599 core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
600 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
601 core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
602 +core-$(CONFIG_PERFMON) += arch/ia64/perfmon/
603 core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/
604 core-$(CONFIG_KVM) += arch/ia64/kvm/
606 diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
607 index 9f48397..ff9572a 100644
608 --- a/arch/ia64/configs/generic_defconfig
609 +++ b/arch/ia64/configs/generic_defconfig
610 @@ -209,7 +209,6 @@ CONFIG_IA32_SUPPORT=y
612 CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
613 CONFIG_IA64_MCA_RECOVERY=y
615 CONFIG_IA64_PALINFO=y
616 # CONFIG_IA64_MC_ERR_INJECT is not set
618 @@ -234,6 +233,16 @@ CONFIG_BINFMT_ELF=y
622 +# Hardware Performance Monitoring support
625 +CONFIG_IA64_PERFMON_COMPAT=y
626 +CONFIG_IA64_PERFMON_GENERIC=m
627 +CONFIG_IA64_PERFMON_ITANIUM=y
628 +CONFIG_IA64_PERFMON_MCKINLEY=y
629 +CONFIG_IA64_PERFMON_MONTECITO=y
632 # Power management and ACPI
635 diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
636 index ccbe8ae..cf64b3b 100644
637 --- a/arch/ia64/include/asm/Kbuild
638 +++ b/arch/ia64/include/asm/Kbuild
639 @@ -5,10 +5,12 @@ header-y += fpu.h
641 header-y += ia64regs.h
642 header-y += intel_intrin.h
643 -header-y += perfmon_default_smpl.h
644 header-y += ptrace_offsets.h
646 header-y += ucontext.h
647 +header-y += perfmon.h
648 +header-y += perfmon_compat.h
649 +header-y += perfmon_default_smpl.h
651 unifdef-y += gcc_intrin.h
652 unifdef-y += intrinsics.h
653 diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h
654 index 5c99cbc..4a45cb0 100644
655 --- a/arch/ia64/include/asm/hw_irq.h
656 +++ b/arch/ia64/include/asm/hw_irq.h
657 @@ -67,9 +67,9 @@ extern int ia64_last_device_vector;
658 #define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1)
660 #define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */
661 -#define IA64_PERFMON_VECTOR 0xee /* performance monitor interrupt vector */
662 #define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */
663 #define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */
664 +#define IA64_PERFMON_VECTOR 0xf1 /* performance monitor interrupt vector */
665 #define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */
666 #define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */
667 #define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */
668 diff --git a/arch/ia64/include/asm/perfmon.h b/arch/ia64/include/asm/perfmon.h
669 index 7f3333d..150c4b4 100644
670 --- a/arch/ia64/include/asm/perfmon.h
671 +++ b/arch/ia64/include/asm/perfmon.h
674 - * Copyright (C) 2001-2003 Hewlett-Packard Co
675 - * Stephane Eranian <eranian@hpl.hp.com>
678 -#ifndef _ASM_IA64_PERFMON_H
679 -#define _ASM_IA64_PERFMON_H
682 - * perfmon comamnds supported on all CPU models
684 -#define PFM_WRITE_PMCS 0x01
685 -#define PFM_WRITE_PMDS 0x02
686 -#define PFM_READ_PMDS 0x03
687 -#define PFM_STOP 0x04
688 -#define PFM_START 0x05
689 -#define PFM_ENABLE 0x06 /* obsolete */
690 -#define PFM_DISABLE 0x07 /* obsolete */
691 -#define PFM_CREATE_CONTEXT 0x08
692 -#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */
693 -#define PFM_RESTART 0x0a
694 -#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */
695 -#define PFM_GET_FEATURES 0x0c
696 -#define PFM_DEBUG 0x0d
697 -#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */
698 -#define PFM_GET_PMC_RESET_VAL 0x0f
699 -#define PFM_LOAD_CONTEXT 0x10
700 -#define PFM_UNLOAD_CONTEXT 0x11
703 - * PMU model specific commands (may not be supported on all PMU models)
705 -#define PFM_WRITE_IBRS 0x20
706 -#define PFM_WRITE_DBRS 0x21
711 -#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */
712 -#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */
713 -#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */
718 -#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */
723 -#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */
724 -#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */
727 - * PMD/PMC/IBR/DBR return flags (ignored on input)
728 + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
729 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
731 - * Those flags are used on output and must be checked in case EAGAIN is returned
732 - * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
734 -#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */
735 -#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */
736 -#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
738 -#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0)
740 -typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */
743 - * Request structure used to define a context
746 - pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */
747 - unsigned long ctx_flags; /* noblock/block */
748 - unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */
749 - unsigned short ctx_reserved1; /* for future use */
750 - int ctx_fd; /* return arg: unique identification for context */
751 - void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */
752 - unsigned long ctx_reserved2[11];/* for future use */
756 - * Request structure used to write/read a PMC or PMD
759 - unsigned int reg_num; /* which register */
760 - unsigned short reg_set; /* event set for this register */
761 - unsigned short reg_reserved1; /* for future use */
763 - unsigned long reg_value; /* initial pmc/pmd value */
764 - unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */
766 - unsigned long reg_long_reset; /* reset after buffer overflow notification */
767 - unsigned long reg_short_reset; /* reset after counter overflow */
769 - unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */
770 - unsigned long reg_random_seed; /* seed value when randomization is used */
771 - unsigned long reg_random_mask; /* bitmask used to limit random value */
772 - unsigned long reg_last_reset_val;/* return: PMD last reset value */
774 - unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */
775 - unsigned long reg_smpl_eventid; /* opaque sampling event identifier */
777 - unsigned long reg_reserved2[3]; /* for future use */
781 - unsigned int dbreg_num; /* which debug register */
782 - unsigned short dbreg_set; /* event set for this register */
783 - unsigned short dbreg_reserved1; /* for future use */
784 - unsigned long dbreg_value; /* value for debug register */
785 - unsigned long dbreg_flags; /* return: dbreg error */
786 - unsigned long dbreg_reserved2[1]; /* for future use */
790 - unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */
791 - unsigned int ft_reserved; /* reserved for future use */
792 - unsigned long reserved[4]; /* for future use */
796 - pid_t load_pid; /* process to load the context into */
797 - unsigned short load_set; /* first event set to load */
798 - unsigned short load_reserved1; /* for future use */
799 - unsigned long load_reserved2[3]; /* for future use */
803 - int msg_type; /* generic message header */
804 - int msg_ctx_fd; /* generic message header */
805 - unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */
806 - unsigned short msg_active_set; /* active set at the time of overflow */
807 - unsigned short msg_reserved1; /* for future use */
808 - unsigned int msg_reserved2; /* for future use */
809 - unsigned long msg_tstamp; /* for perf tuning/debug */
813 - int msg_type; /* generic message header */
814 - int msg_ctx_fd; /* generic message header */
815 - unsigned long msg_tstamp; /* for perf tuning */
819 - int msg_type; /* type of the message */
820 - int msg_ctx_fd; /* unique identifier for the context */
821 - unsigned long msg_tstamp; /* for perf tuning */
824 -#define PFM_MSG_OVFL 1 /* an overflow happened */
825 -#define PFM_MSG_END 2 /* task to which context was attached ended */
828 - pfm_ovfl_msg_t pfm_ovfl_msg;
829 - pfm_end_msg_t pfm_end_msg;
830 - pfm_gen_msg_t pfm_gen_msg;
834 - * Define the version numbers for both perfmon as a whole and the sampling buffer format.
835 + * This file contains Itanium Processor Family specific definitions
836 + * for the perfmon interface.
838 + * This program is free software; you can redistribute it and/or
839 + * modify it under the terms of version 2 of the GNU General Public
840 + * License as published by the Free Software Foundation.
842 + * This program is distributed in the hope that it will be useful,
843 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
844 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
845 + * General Public License for more details.
847 + * You should have received a copy of the GNU General Public License
848 + * along with this program; if not, write to the Free Software
849 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
852 -#define PFM_VERSION_MAJ 2U
853 -#define PFM_VERSION_MIN 0U
854 -#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
855 -#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff)
856 -#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
858 +#ifndef _ASM_IA64_PERFMON_H_
859 +#define _ASM_IA64_PERFMON_H_
862 - * miscellaneous architected definitions
863 + * arch-specific user visible interface definitions
865 -#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */
866 -#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */
867 -#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */
871 -extern long perfmonctl(int fd, int cmd, void *arg, int narg);
874 - void (*handler)(int irq, void *arg, struct pt_regs *regs);
875 -} pfm_intr_handler_desc_t;
877 -extern void pfm_save_regs (struct task_struct *);
878 -extern void pfm_load_regs (struct task_struct *);
880 -extern void pfm_exit_thread(struct task_struct *);
881 -extern int pfm_use_debug_registers(struct task_struct *);
882 -extern int pfm_release_debug_registers(struct task_struct *);
883 -extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
884 -extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
885 -extern void pfm_init_percpu(void);
886 -extern void pfm_handle_work(void);
887 -extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
888 -extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
889 +#define PFM_ARCH_MAX_PMCS (256+64)
890 +#define PFM_ARCH_MAX_PMDS (256+64)
895 - * Reset PMD register flags
897 -#define PFM_PMD_SHORT_RESET 0
898 -#define PFM_PMD_LONG_RESET 1
903 - unsigned int notify_user:1; /* notify user program of overflow */
904 - unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */
905 - unsigned int block_task:1; /* block monitored task on kernel exit */
906 - unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
907 - unsigned int reserved:28; /* for future use */
912 - unsigned char ovfl_pmd; /* index of overflowed PMD */
913 - unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */
914 - unsigned short active_set; /* event set active at the time of the overflow */
915 - pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */
917 - unsigned long pmd_last_reset; /* last reset value of of the PMD */
918 - unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */
919 - unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */
920 - unsigned long pmd_value; /* current 64-bit value of the PMD */
921 - unsigned long pmd_eventid; /* eventid associated with PMD */
927 - pfm_uuid_t fmt_uuid;
928 - size_t fmt_arg_size;
929 - unsigned long fmt_flags;
931 - int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg);
932 - int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size);
933 - int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg);
934 - int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
935 - int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
936 - int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
937 - int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);
939 - struct list_head fmt_list;
942 -extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
943 -extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
944 +#define PFM_ARCH_PMD_STK_ARG 8
945 +#define PFM_ARCH_PMC_STK_ARG 8
948 - * perfmon interface exported to modules
949 + * Itanium specific context flags
951 + * bits[00-15]: generic flags (see asm/perfmon.h)
952 + * bits[16-31]: arch-specific flags
954 -extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
955 -extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
956 -extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
957 -extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
958 +#define PFM_ITA_FL_INSECURE 0x10000 /* clear psr.sp on non system, non self */
961 - * describe the content of the local_cpu_date->pfm_syst_info field
962 + * Itanium specific public event set flags (set_flags)
964 + * event set flags layout:
965 + * bits[00-15] : generic flags
966 + * bits[16-31] : arch-specific flags
968 -#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */
969 -#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */
970 -#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */
971 +#define PFM_ITA_SETFL_EXCL_INTR 0x10000 /* exclude interrupt execution */
972 +#define PFM_ITA_SETFL_INTR_ONLY 0x20000 /* include only interrupt execution */
973 +#define PFM_ITA_SETFL_IDLE_EXCL 0x40000 /* stop monitoring in idle loop */
976 - * sysctl control structure. visible to sampling formats
977 + * compatibility for version v2.0 of the interface
980 - int debug; /* turn on/off debugging via syslog */
981 - int debug_ovfl; /* turn on/off debug printk in overflow handler */
982 - int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
983 - int expert_mode; /* turn on/off value checking */
985 -extern pfm_sysctl_t pfm_sysctl;
988 -#endif /* __KERNEL__ */
989 +#include <asm/perfmon_compat.h>
991 -#endif /* _ASM_IA64_PERFMON_H */
992 +#endif /* _ASM_IA64_PERFMON_H_ */
993 diff --git a/arch/ia64/include/asm/perfmon_compat.h b/arch/ia64/include/asm/perfmon_compat.h
995 index 0000000..5c14514
997 +++ b/arch/ia64/include/asm/perfmon_compat.h
1000 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
1001 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
1003 + * This header file contains perfmon interface definition
1004 + * that are now obsolete and should be dropped in favor
1005 + * of their equivalent functions as explained below.
1007 + * This program is free software; you can redistribute it and/or
1008 + * modify it under the terms of version 2 of the GNU General Public
1009 + * License as published by the Free Software Foundation.
1011 + * This program is distributed in the hope that it will be useful,
1012 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1013 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1014 + * General Public License for more details.
1016 + * You should have received a copy of the GNU General Public License
1017 + * along with this program; if not, write to the Free Software
1018 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1022 +#ifndef _ASM_IA64_PERFMON_COMPAT_H_
1023 +#define _ASM_IA64_PERFMON_COMPAT_H_
1026 + * custom sampling buffer identifier type
1028 +typedef __u8 pfm_uuid_t[16];
1031 + * obsolete perfmon commands. Supported only on IA-64 for
1032 + * backward compatiblity reasons with perfmon v2.0.
1034 +#define PFM_WRITE_PMCS 0x01 /* use pfm_write_pmcs */
1035 +#define PFM_WRITE_PMDS 0x02 /* use pfm_write_pmds */
1036 +#define PFM_READ_PMDS 0x03 /* use pfm_read_pmds */
1037 +#define PFM_STOP 0x04 /* use pfm_stop */
1038 +#define PFM_START 0x05 /* use pfm_start */
1039 +#define PFM_ENABLE 0x06 /* obsolete */
1040 +#define PFM_DISABLE 0x07 /* obsolete */
1041 +#define PFM_CREATE_CONTEXT 0x08 /* use pfm_create_context */
1042 +#define PFM_DESTROY_CONTEXT 0x09 /* use close() */
1043 +#define PFM_RESTART 0x0a /* use pfm_restart */
1044 +#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */
1045 +#define PFM_GET_FEATURES 0x0c /* use /proc/sys/perfmon */
1046 +#define PFM_DEBUG 0x0d /* /proc/sys/kernel/perfmon/debug */
1047 +#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */
1048 +#define PFM_GET_PMC_RESET_VAL 0x0f /* use /proc/perfmon_map */
1049 +#define PFM_LOAD_CONTEXT 0x10 /* use pfm_load_context */
1050 +#define PFM_UNLOAD_CONTEXT 0x11 /* use pfm_unload_context */
1053 + * PMU model specific commands (may not be supported on all PMU models)
1055 +#define PFM_WRITE_IBRS 0x20 /* obsolete: use PFM_WRITE_PMCS[256-263]*/
1056 +#define PFM_WRITE_DBRS 0x21 /* obsolete: use PFM_WRITE_PMCS[264-271]*/
1059 + * argument to PFM_CREATE_CONTEXT
1061 +struct pfarg_context {
1062 + pfm_uuid_t ctx_smpl_buf_id; /* buffer format to use */
1063 + unsigned long ctx_flags; /* noblock/block */
1064 + unsigned int ctx_reserved1; /* for future use */
1065 + int ctx_fd; /* return: fildesc */
1066 + void *ctx_smpl_vaddr; /* return: vaddr of buffer */
1067 + unsigned long ctx_reserved3[11];/* for future use */
1071 + * argument structure for PFM_WRITE_PMCS/PFM_WRITE_PMDS/PFM_WRITE_PMDS
1074 + unsigned int reg_num; /* which register */
1075 + unsigned short reg_set; /* event set for this register */
1076 + unsigned short reg_reserved1; /* for future use */
1078 + unsigned long reg_value; /* initial pmc/pmd value */
1079 + unsigned long reg_flags; /* input: flags, ret: error */
1081 + unsigned long reg_long_reset; /* reset value after notification */
1082 + unsigned long reg_short_reset; /* reset after counter overflow */
1084 + unsigned long reg_reset_pmds[4]; /* registers to reset on overflow */
1085 + unsigned long reg_random_seed; /* seed for randomization */
1086 + unsigned long reg_random_mask; /* random range limit */
1087 + unsigned long reg_last_reset_val;/* return: PMD last reset value */
1089 + unsigned long reg_smpl_pmds[4]; /* pmds to be saved on overflow */
1090 + unsigned long reg_smpl_eventid; /* opaque sampling event id */
1091 + unsigned long reg_ovfl_switch_cnt;/* #overflows to switch */
1093 + unsigned long reg_reserved2[2]; /* for future use */
1097 + * argument to PFM_WRITE_IBRS/PFM_WRITE_DBRS
1099 +struct pfarg_dbreg {
1100 + unsigned int dbreg_num; /* which debug register */
1101 + unsigned short dbreg_set; /* event set */
1102 + unsigned short dbreg_reserved1; /* for future use */
1103 + unsigned long dbreg_value; /* value for debug register */
1104 + unsigned long dbreg_flags; /* return: dbreg error */
1105 + unsigned long dbreg_reserved2[1]; /* for future use */
1109 + * argument to PFM_GET_FEATURES
1111 +struct pfarg_features {
1112 + unsigned int ft_version; /* major [16-31], minor [0-15] */
1113 + unsigned int ft_reserved; /* reserved for future use */
1114 + unsigned long reserved[4]; /* for future use */
1118 + int msg_type; /* generic message header */
1119 + int msg_ctx_fd; /* generic message header */
1120 + unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */
1121 + unsigned short msg_active_set; /* active set on overflow */
1122 + unsigned short msg_reserved1; /* for future use */
1123 + unsigned int msg_reserved2; /* for future use */
1124 + unsigned long msg_tstamp; /* for perf tuning/debug */
1128 + int msg_type; /* generic message header */
1129 + int msg_ctx_fd; /* generic message header */
1130 + unsigned long msg_tstamp; /* for perf tuning */
1134 + int msg_type; /* type of the message */
1135 + int msg_ctx_fd; /* context file descriptor */
1136 + unsigned long msg_tstamp; /* for perf tuning */
1141 + pfm_ovfl_msg_t pfm_ovfl_msg;
1142 + pfm_end_msg_t pfm_end_msg;
1143 + pfm_gen_msg_t pfm_gen_msg;
1147 + * PMD/PMC return flags in case of error (ignored on input)
1149 + * reg_flags layout:
1150 + * bit 00-15 : generic flags
1151 + * bits[16-23] : arch-specific flags (see asm/perfmon.h)
1152 + * bit 24-31 : error codes
1154 + * Those flags are used on output and must be checked in case EINVAL is
1155 + * returned by a command accepting a vector of values and each has a flag
1156 + * field, such as pfarg_reg or pfarg_reg
1158 +#define PFM_REG_RETFL_NOTAVAIL (1<<31) /* not implemented or unaccessible */
1159 +#define PFM_REG_RETFL_EINVAL (1<<30) /* entry is invalid */
1160 +#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|\
1161 + PFM_REG_RETFL_EINVAL)
1163 +#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0)
1165 +#endif /* _ASM_IA64_PERFMON_COMPAT_H_ */
1166 diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h
1167 index 48822c0..8234f32 100644
1168 --- a/arch/ia64/include/asm/perfmon_default_smpl.h
1169 +++ b/arch/ia64/include/asm/perfmon_default_smpl.h
1172 - * Copyright (C) 2002-2003 Hewlett-Packard Co
1173 - * Stephane Eranian <eranian@hpl.hp.com>
1174 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
1175 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
1177 - * This file implements the default sampling buffer format
1178 - * for Linux/ia64 perfmon subsystem.
1179 + * This file implements the old default sampling buffer format
1180 + * for the perfmon2 subsystem. For IA-64 only.
1182 + * It requires the use of the perfmon_compat.h header. It is recommended
1183 + * that applications be ported to the new format instead.
1185 + * This program is free software; you can redistribute it and/or
1186 + * modify it under the terms of version 2 of the GNU General Public
1187 + * License as published by the Free Software Foundation.
1189 + * This program is distributed in the hope that it will be useful,
1190 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1191 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1192 + * General Public License for more details.
1194 + * You should have received a copy of the GNU General Public License
1195 + * along with this program; if not, write to the Free Software
1196 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1199 -#ifndef __PERFMON_DEFAULT_SMPL_H__
1200 -#define __PERFMON_DEFAULT_SMPL_H__ 1
1201 +#ifndef __ASM_IA64_PERFMON_DEFAULT_SMPL_H__
1202 +#define __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ 1
1205 +#error "this file must be used for compatibility reasons only on IA-64"
1208 #define PFM_DEFAULT_SMPL_UUID { \
1209 - 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
1210 + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\
1211 + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
1214 * format specific parameters (passed at context creation)
1217 +struct pfm_default_smpl_arg {
1218 unsigned long buf_size; /* size of the buffer in bytes */
1219 unsigned int flags; /* buffer specific flags */
1220 unsigned int res1; /* for future use */
1221 unsigned long reserved[2]; /* for future use */
1222 -} pfm_default_smpl_arg_t;
1226 * combined context+format specific structure. Can be passed
1227 - * to PFM_CONTEXT_CREATE
1228 + * to PFM_CONTEXT_CREATE (not PFM_CONTEXT_CREATE2)
1231 - pfarg_context_t ctx_arg;
1232 - pfm_default_smpl_arg_t buf_arg;
1233 -} pfm_default_smpl_ctx_arg_t;
1234 +struct pfm_default_smpl_ctx_arg {
1235 + struct pfarg_context ctx_arg;
1236 + struct pfm_default_smpl_arg buf_arg;
1240 * This header is at the beginning of the sampling buffer returned to the user.
1241 * It is directly followed by the first record.
1244 - unsigned long hdr_count; /* how many valid entries */
1245 - unsigned long hdr_cur_offs; /* current offset from top of buffer */
1246 - unsigned long hdr_reserved2; /* reserved for future use */
1247 +struct pfm_default_smpl_hdr {
1248 + u64 hdr_count; /* how many valid entries */
1249 + u64 hdr_cur_offs; /* current offset from top of buffer */
1250 + u64 dr_reserved2; /* reserved for future use */
1252 - unsigned long hdr_overflows; /* how many times the buffer overflowed */
1253 - unsigned long hdr_buf_size; /* how many bytes in the buffer */
1254 + u64 hdr_overflows; /* how many times the buffer overflowed */
1255 + u64 hdr_buf_size; /* how many bytes in the buffer */
1257 - unsigned int hdr_version; /* contains perfmon version (smpl format diffs) */
1258 - unsigned int hdr_reserved1; /* for future use */
1259 - unsigned long hdr_reserved[10]; /* for future use */
1260 -} pfm_default_smpl_hdr_t;
1261 + u32 hdr_version; /* smpl format version*/
1262 + u32 hdr_reserved1; /* for future use */
1263 + u64 hdr_reserved[10]; /* for future use */
1267 * Entry header in the sampling buffer. The header is directly followed
1268 - * with the values of the PMD registers of interest saved in increasing
1269 - * index order: PMD4, PMD5, and so on. How many PMDs are present depends
1270 + * with the values of the PMD registers of interest saved in increasing
1271 + * index order: PMD4, PMD5, and so on. How many PMDs are present depends
1272 * on how the session was programmed.
1274 * In the case where multiple counters overflow at the same time, multiple
1275 * entries are written consecutively.
1277 - * last_reset_value member indicates the initial value of the overflowed PMD.
1278 + * last_reset_value member indicates the initial value of the overflowed PMD.
1281 - int pid; /* thread id (for NPTL, this is gettid()) */
1282 - unsigned char reserved1[3]; /* reserved for future use */
1283 - unsigned char ovfl_pmd; /* index of overflowed PMD */
1285 - unsigned long last_reset_val; /* initial value of overflowed PMD */
1286 - unsigned long ip; /* where did the overflow interrupt happened */
1287 - unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */
1289 - unsigned short cpu; /* cpu on which the overfow occured */
1290 - unsigned short set; /* event set active when overflow ocurred */
1291 - int tgid; /* thread group id (for NPTL, this is getpid()) */
1292 -} pfm_default_smpl_entry_t;
1293 +struct pfm_default_smpl_entry {
1294 + pid_t pid; /* thread id (for NPTL, this is gettid()) */
1295 + uint8_t reserved1[3]; /* for future use */
1296 + uint8_t ovfl_pmd; /* overflow pmd for this sample */
1297 + u64 last_reset_val; /* initial value of overflowed PMD */
1298 + unsigned long ip; /* where did the overflow interrupt happened */
1299 + u64 tstamp; /* overflow timetamp */
1300 + u16 cpu; /* cpu on which the overfow occured */
1301 + u16 set; /* event set active when overflow ocurred */
1302 + pid_t tgid; /* thread group id (for NPTL, this is getpid()) */
1305 -#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */
1306 -#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS))
1307 -#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE)
1308 +#define PFM_DEFAULT_MAX_PMDS 64 /* #pmds supported */
1309 +#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(struct pfm_default_smpl_entry)+\
1310 + (sizeof(u64)*PFM_DEFAULT_MAX_PMDS))
1311 +#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(struct pfm_default_smpl_hdr)+\
1312 + PFM_DEFAULT_MAX_ENTRY_SIZE)
1314 #define PFM_DEFAULT_SMPL_VERSION_MAJ 2U
1315 -#define PFM_DEFAULT_SMPL_VERSION_MIN 0U
1316 -#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
1317 +#define PFM_DEFAULT_SMPL_VERSION_MIN 1U
1318 +#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|\
1319 + (PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
1321 -#endif /* __PERFMON_DEFAULT_SMPL_H__ */
1322 +#endif /* __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ */
1323 diff --git a/arch/ia64/include/asm/perfmon_kern.h b/arch/ia64/include/asm/perfmon_kern.h
1324 new file mode 100644
1325 index 0000000..fb40459
1327 +++ b/arch/ia64/include/asm/perfmon_kern.h
1330 + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
1331 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
1333 + * This file contains Itanium Processor Family specific definitions
1334 + * for the perfmon interface.
1336 + * This program is free software; you can redistribute it and/or
1337 + * modify it under the terms of version 2 of the GNU General Public
1338 + * License as published by the Free Software Foundation.
1340 + * This program is distributed in the hope that it will be useful,
1341 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1342 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1343 + * General Public License for more details.
1345 + * You should have received a copy of the GNU General Public License
1346 + * along with this program; if not, write to the Free Software
1347 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1350 +#ifndef _ASM_IA64_PERFMON_KERN_H_
1351 +#define _ASM_IA64_PERFMON_KERN_H_
1355 +#ifdef CONFIG_PERFMON
1356 +#include <asm/unistd.h>
1357 +#include <asm/hw_irq.h>
1360 + * describe the content of the pfm_syst_info field
1362 + * bits[00-15] : generic flags
1363 + * bits[16-31] : arch-specific flags
1365 +#define PFM_ITA_CPUINFO_IDLE_EXCL 0x10000 /* stop monitoring in idle loop */
1368 + * For some CPUs, the upper bits of a counter must be set in order for the
1369 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
1370 + * and the upper bits are cleared. This function may be used to set them back.
1372 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
1373 + unsigned int cnum)
1377 + * called from __pfm_interrupt_handler(). ctx is not NULL.
1378 + * ctx is locked. PMU interrupt is masked.
1380 + * must stop all monitoring to ensure handler has consistent view.
1381 + * must collect overflowed PMDs bitmask into povfls_pmds and
1382 + * npend_ovfls. If no interrupt detected then npend_ovfls
1383 + * must be set to zero.
1385 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
1386 + struct pfm_event_set *set)
1391 + * do not overwrite existing value, must
1392 + * process those first (coming from context switch replay)
1394 + if (set->npend_ovfls)
1399 + tmp = ia64_get_pmc(0) & ~0xf;
1401 + set->povfl_pmds[0] = tmp;
1403 + set->npend_ovfls = ia64_popcnt(tmp);
1406 +static inline int pfm_arch_init_pmu_config(void)
1411 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
1413 + ia64_resend_irq(IA64_PERFMON_VECTOR);
1416 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
1417 + struct pfm_event_set *set)
1420 +static inline void pfm_arch_serialize(void)
1425 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
1427 + PFM_DBG_ovfl("state=%d", ctx->state);
1428 + ia64_set_pmc(0, 0);
1429 + /* no serialization */
1432 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
1433 + unsigned int cnum, u64 value)
1436 + ia64_set_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
1437 + } else if (cnum < 264) {
1438 + ia64_set_ibr(cnum-256, value);
1439 + ia64_dv_serialize_instruction();
1441 + ia64_set_dbr(cnum-264, value);
1442 + ia64_dv_serialize_instruction();
1447 + * On IA-64, for per-thread context which have the ITA_FL_INSECURE
1448 + * flag, it is possible to start/stop monitoring directly from user evel
1449 + * without calling pfm_start()/pfm_stop. This allows very lightweight
1450 + * control yet the kernel sometimes needs to know if monitoring is actually
1453 + * Tracking of this information is normally done by pfm_start/pfm_stop
1454 + * in flags.started. Here we need to compensate by checking actual
1457 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
1459 + return ctx->flags.started
1460 + || ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_UP|IA64_PSR_PP);
1463 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
1464 + unsigned int cnum, u64 value)
1467 + * for a counting PMD, overflow bit must be cleared
1469 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
1470 + value &= pfm_pmu_conf->ovfl_mask;
1473 + * for counters, write to upper bits are ignored, no need to mask
1475 + ia64_set_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
1478 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
1480 + return ia64_get_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr);
1483 +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
1485 + return ia64_get_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr);
1488 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
1489 + struct pfm_context *ctx)
1491 + struct pt_regs *regs;
1493 + regs = task_pt_regs(task);
1494 + ia64_psr(regs)->pp = 0;
1497 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
1498 + struct pfm_context *ctx)
1500 + struct pt_regs *regs;
1502 + if (!(ctx->active_set->flags & PFM_ITA_SETFL_INTR_ONLY)) {
1503 + regs = task_pt_regs(task);
1504 + ia64_psr(regs)->pp = 1;
1509 + * On IA-64, the PMDs are NOT saved by pfm_arch_freeze_pmu()
1510 + * when entering the PMU interrupt handler, thus, we need
1511 + * to save them in pfm_switch_sets_from_intr()
1513 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
1514 + struct pfm_event_set *set)
1516 + pfm_save_pmds(ctx, set);
1519 +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags);
1521 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
1524 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
1525 +void pfm_arch_ctxswin_thread(struct task_struct *task,
1526 + struct pfm_context *ctx);
1528 +void pfm_arch_unload_context(struct pfm_context *ctx);
1529 +int pfm_arch_load_context(struct pfm_context *ctx);
1530 +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags);
1532 +void pfm_arch_mask_monitoring(struct pfm_context *ctx,
1533 + struct pfm_event_set *set);
1534 +void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
1535 + struct pfm_event_set *set);
1537 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
1538 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
1540 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
1541 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
1543 +int pfm_arch_init(void);
1544 +void pfm_arch_init_percpu(void);
1545 +char *pfm_arch_get_pmu_module_name(void);
1547 +int __pfm_use_dbregs(struct task_struct *task);
1548 +int __pfm_release_dbregs(struct task_struct *task);
1549 +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
1550 + struct pfm_event_set *set);
1552 +void pfm_arch_show_session(struct seq_file *m);
1554 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
1559 +static inline void pfm_arch_pmu_release(void)
1562 +/* not necessary on IA-64 */
1563 +static inline void pfm_cacheflush(void *addr, unsigned int len)
1567 + * miscellaneous architected definitions
1569 +#define PFM_ITA_FCNTR 4 /* first counting monitor (PMC/PMD) */
1572 + * private event set flags (set_priv_flags)
1574 +#define PFM_ITA_SETFL_USE_DBR 0x1000000 /* set uses debug registers */
1578 + * Itanium-specific data structures
1580 +struct pfm_ia64_context_flags {
1581 + unsigned int use_dbr:1; /* use range restrictions (debug registers) */
1582 + unsigned int insecure:1; /* insecure monitoring for non-self session */
1583 + unsigned int reserved:30;/* for future use */
1586 +struct pfm_arch_context {
1587 + struct pfm_ia64_context_flags flags; /* arch specific ctx flags */
1588 + u64 ctx_saved_psr_up;/* storage for psr_up */
1589 +#ifdef CONFIG_IA64_PERFMON_COMPAT
1590 + void *ctx_smpl_vaddr; /* vaddr of user mapping */
1594 +#ifdef CONFIG_IA64_PERFMON_COMPAT
1595 +ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
1599 +int pfm_ia64_compat_init(void);
1600 +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
1601 + size_t rsize, struct file *filp);
1603 +static inline ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
1611 +static inline int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
1612 + size_t rsize, struct file *filp)
1618 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
1621 + * On IA-64, we ran out of bits in the bottom 7 bits of the
1622 + * threadinfo bitmask.Thus we used a 2-stage approach by piggybacking
1623 + * on NOTIFY_RESUME and then in do_notify_resume() we demultiplex and
1624 + * call pfm_handle_work() if needed
1626 + set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
1629 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
1632 + * we cannot just clear TIF_NOTIFY_RESUME because other TIF flags are
1633 + * piggybackedonto it: TIF_PERFMON_WORK, TIF_RESTORE_RSE
1635 + * The tsk_clear_notify_resume() checks if any of those are set before
1636 + * clearing the * bit
1638 + tsk_clear_notify_resume(task);
1641 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
1646 +extern struct pfm_ia64_pmu_info *pfm_ia64_pmu_info;
1648 +#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context))
1651 + * IA-64 does not need extra alignment requirements for the sampling buffer
1653 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
1656 +static inline void pfm_release_dbregs(struct task_struct *task)
1658 + if (task->thread.flags & IA64_THREAD_DBG_VALID)
1659 + __pfm_release_dbregs(task);
1662 +#define pfm_use_dbregs(_t) __pfm_use_dbregs(_t)
1664 +static inline int pfm_arch_get_base_syscall(void)
1666 + return __NR_pfm_create_context;
1669 +struct pfm_arch_pmu_info {
1670 + unsigned long mask_pmcs[PFM_PMC_BV]; /* modify on when masking */
1673 +DECLARE_PER_CPU(u32, pfm_syst_info);
1674 +#else /* !CONFIG_PERFMON */
1676 + * perfmon ia64-specific hooks
1678 +#define pfm_release_dbregs(_t) do { } while (0)
1679 +#define pfm_use_dbregs(_t) (0)
1681 +#endif /* CONFIG_PERFMON */
1683 +#endif /* __KERNEL__ */
1684 +#endif /* _ASM_IA64_PERFMON_KERN_H_ */
1685 diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
1686 index f88fa05..9d6af9c 100644
1687 --- a/arch/ia64/include/asm/processor.h
1688 +++ b/arch/ia64/include/asm/processor.h
1691 #define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */
1692 #define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */
1693 -#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
1694 #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */
1695 #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */
1696 #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration
1697 @@ -321,14 +320,6 @@ struct thread_struct {
1699 # define INIT_THREAD_IA32
1700 #endif /* CONFIG_IA32_SUPPORT */
1701 -#ifdef CONFIG_PERFMON
1702 - void *pfm_context; /* pointer to detailed PMU context */
1703 - unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */
1704 -# define INIT_THREAD_PM .pfm_context = NULL, \
1705 - .pfm_needs_checking = 0UL,
1707 -# define INIT_THREAD_PM
1709 __u64 dbr[IA64_NUM_DBG_REGS];
1710 __u64 ibr[IA64_NUM_DBG_REGS];
1711 struct ia64_fpreg fph[96]; /* saved/loaded on demand */
1712 @@ -343,7 +334,6 @@ struct thread_struct {
1713 .task_size = DEFAULT_TASK_SIZE, \
1714 .last_fph_cpu = -1, \
1719 .fph = {{{{0}}}, } \
1720 diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
1721 index 927a381..ab5aeea 100644
1722 --- a/arch/ia64/include/asm/system.h
1723 +++ b/arch/ia64/include/asm/system.h
1724 @@ -217,6 +217,7 @@ struct task_struct;
1725 extern void ia64_save_extra (struct task_struct *task);
1726 extern void ia64_load_extra (struct task_struct *task);
1729 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
1730 extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
1731 # define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
1732 @@ -224,16 +225,9 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
1733 # define IA64_ACCOUNT_ON_SWITCH(p,n)
1736 -#ifdef CONFIG_PERFMON
1737 - DECLARE_PER_CPU(unsigned long, pfm_syst_info);
1738 -# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
1740 -# define PERFMON_IS_SYSWIDE() (0)
1743 -#define IA64_HAS_EXTRA_STATE(t) \
1744 - ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \
1745 - || IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE())
1746 +#define IA64_HAS_EXTRA_STATE(t) \
1747 + (((t)->thread.flags & IA64_THREAD_DBG_VALID) \
1748 + || IS_IA32_PROCESS(task_pt_regs(t)))
1750 #define __switch_to(prev,next,last) do { \
1751 IA64_ACCOUNT_ON_SWITCH(prev, next); \
1752 @@ -241,6 +235,10 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
1753 ia64_save_extra(prev); \
1754 if (IA64_HAS_EXTRA_STATE(next)) \
1755 ia64_load_extra(next); \
1756 + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
1757 + pfm_ctxsw_out(prev, next); \
1758 + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
1759 + pfm_ctxsw_in(prev, next); \
1760 ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \
1761 (last) = ia64_switch_to((next)); \
1763 diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
1764 index 7c60fcd..3355332 100644
1765 --- a/arch/ia64/include/asm/thread_info.h
1766 +++ b/arch/ia64/include/asm/thread_info.h
1767 @@ -110,6 +110,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk);
1768 #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */
1769 #define TIF_FREEZE 20 /* is freezing for suspend */
1770 #define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */
1771 +#define TIF_PERFMON_CTXSW 22 /* perfmon needs ctxsw calls */
1772 +#define TIF_PERFMON_WORK 23 /* work for pfm_handle_work() */
1774 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
1775 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
1776 @@ -123,6 +125,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk);
1777 #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED)
1778 #define _TIF_FREEZE (1 << TIF_FREEZE)
1779 #define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE)
1780 +#define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW)
1781 +#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK)
1783 /* "work to do on user-return" bits */
1784 #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
1785 diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
1786 index d535833..29a43bc 100644
1787 --- a/arch/ia64/include/asm/unistd.h
1788 +++ b/arch/ia64/include/asm/unistd.h
1789 @@ -308,11 +308,23 @@
1790 #define __NR_dup3 1316
1791 #define __NR_pipe2 1317
1792 #define __NR_inotify_init1 1318
1793 +#define __NR_pfm_create_context 1319
1794 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
1795 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
1796 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
1797 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
1798 +#define __NR_pfm_start (__NR_pfm_create_context+5)
1799 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
1800 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
1801 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
1802 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
1803 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
1804 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
1809 -#define NR_syscalls 295 /* length of syscall table */
1810 +#define NR_syscalls 307 /* length of syscall table */
1813 * The following defines stop scripts/checksyscalls.sh from complaining about
1814 diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
1815 index 87fea11..b5ac54c 100644
1816 --- a/arch/ia64/kernel/Makefile
1817 +++ b/arch/ia64/kernel/Makefile
1819 extra-y := head.o init_task.o vmlinux.lds
1821 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
1822 - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
1823 + irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o ptrace.o sal.o \
1824 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
1825 unwind.o mca.o mca_asm.o topology.o
1827 @@ -23,7 +23,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o
1828 obj-$(CONFIG_MODULES) += module.o
1829 obj-$(CONFIG_SMP) += smp.o smpboot.o
1830 obj-$(CONFIG_NUMA) += numa.o
1831 -obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
1832 obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
1833 obj-$(CONFIG_CPU_FREQ) += cpufreq/
1834 obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
1835 diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
1836 index 0dd6c14..f1c3e41 100644
1837 --- a/arch/ia64/kernel/entry.S
1838 +++ b/arch/ia64/kernel/entry.S
1839 @@ -1697,6 +1697,18 @@ sys_call_table:
1842 data8 sys_inotify_init1
1843 + data8 sys_pfm_create_context
1844 + data8 sys_pfm_write_pmcs // 1320
1845 + data8 sys_pfm_write_pmds
1846 + data8 sys_pfm_read_pmds
1847 + data8 sys_pfm_load_context
1848 + data8 sys_pfm_start
1849 + data8 sys_pfm_stop // 1325
1850 + data8 sys_pfm_restart
1851 + data8 sys_pfm_create_evtsets
1852 + data8 sys_pfm_getinfo_evtsets
1853 + data8 sys_pfm_delete_evtsets
1854 + data8 sys_pfm_unload_context // 1330
1856 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
1857 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
1858 diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
1859 index 28d3d48..ede8024 100644
1860 --- a/arch/ia64/kernel/irq_ia64.c
1861 +++ b/arch/ia64/kernel/irq_ia64.c
1863 #include <asm/system.h>
1864 #include <asm/tlbflush.h>
1866 -#ifdef CONFIG_PERFMON
1867 -# include <asm/perfmon.h>
1872 #define IRQ_VECTOR_UNASSIGNED (0)
1873 @@ -660,9 +656,6 @@ init_IRQ (void)
1877 -#ifdef CONFIG_PERFMON
1878 - pfm_init_percpu();
1880 platform_irq_init();
1883 diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
1884 deleted file mode 100644
1885 index 5f637bb..0000000
1886 --- a/arch/ia64/kernel/perfmon_default_smpl.c
1890 - * Copyright (C) 2002-2003 Hewlett-Packard Co
1891 - * Stephane Eranian <eranian@hpl.hp.com>
1893 - * This file implements the default sampling buffer format
1894 - * for the Linux/ia64 perfmon-2 subsystem.
1896 -#include <linux/kernel.h>
1897 -#include <linux/types.h>
1898 -#include <linux/module.h>
1899 -#include <linux/init.h>
1900 -#include <asm/delay.h>
1901 -#include <linux/smp.h>
1903 -#include <asm/perfmon.h>
1904 -#include <asm/perfmon_default_smpl.h>
1906 -MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
1907 -MODULE_DESCRIPTION("perfmon default sampling format");
1908 -MODULE_LICENSE("GPL");
1910 -#define DEFAULT_DEBUG 1
1912 -#ifdef DEFAULT_DEBUG
1913 -#define DPRINT(a) \
1915 - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
1918 -#define DPRINT_ovfl(a) \
1920 - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
1925 -#define DPRINT_ovfl(a)
1929 -default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
1931 - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
1934 - if (data == NULL) {
1935 - DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
1939 - DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
1942 - * must hold at least the buffer header + one minimally sized entry
1944 - if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
1946 - DPRINT(("buf_size=%lu\n", arg->buf_size));
1952 -default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
1954 - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
1957 - * size has been validated in default_validate
1959 - *size = arg->buf_size;
1965 -default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
1967 - pfm_default_smpl_hdr_t *hdr;
1968 - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
1970 - hdr = (pfm_default_smpl_hdr_t *)buf;
1972 - hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
1973 - hdr->hdr_buf_size = arg->buf_size;
1974 - hdr->hdr_cur_offs = sizeof(*hdr);
1975 - hdr->hdr_overflows = 0UL;
1976 - hdr->hdr_count = 0UL;
1978 - DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
1979 - task_pid_nr(task),
1981 - hdr->hdr_buf_size,
1984 - hdr->hdr_cur_offs));
1990 -default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
1992 - pfm_default_smpl_hdr_t *hdr;
1993 - pfm_default_smpl_entry_t *ent;
1995 - unsigned long *e, entry_size;
1996 - unsigned int npmds, i;
1997 - unsigned char ovfl_pmd;
1998 - unsigned char ovfl_notify;
2000 - if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
2001 - DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
2005 - hdr = (pfm_default_smpl_hdr_t *)buf;
2006 - cur = buf+hdr->hdr_cur_offs;
2007 - last = buf+hdr->hdr_buf_size;
2008 - ovfl_pmd = arg->ovfl_pmd;
2009 - ovfl_notify = arg->ovfl_notify;
2012 - * precheck for sanity
2014 - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
2016 - npmds = hweight64(arg->smpl_pmds[0]);
2018 - ent = (pfm_default_smpl_entry_t *)cur;
2020 - prefetch(arg->smpl_pmds_values);
2022 - entry_size = sizeof(*ent) + (npmds << 3);
2024 - /* position for first pmd */
2025 - e = (unsigned long *)(ent+1);
2029 - DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
2035 - ovfl_notify, npmds));
2038 - * current = task running at the time of the overflow.
2041 - * - this is ususally the task being monitored.
2042 - * Under certain conditions, it might be a different task
2045 - * - this is not necessarily the task controlling the session
2047 - ent->pid = current->pid;
2048 - ent->ovfl_pmd = ovfl_pmd;
2049 - ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
2052 - * where did the fault happen (includes slot number)
2054 - ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
2056 - ent->tstamp = stamp;
2057 - ent->cpu = smp_processor_id();
2058 - ent->set = arg->active_set;
2059 - ent->tgid = current->tgid;
2062 - * selectively store PMDs in increasing index number
2065 - unsigned long *val = arg->smpl_pmds_values;
2066 - for(i=0; i < npmds; i++) {
2072 - * update position for next entry
2074 - hdr->hdr_cur_offs += entry_size;
2075 - cur += entry_size;
2078 - * post check to avoid losing the last sample
2080 - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
2083 - * keep same ovfl_pmds, ovfl_notify
2085 - arg->ovfl_ctrl.bits.notify_user = 0;
2086 - arg->ovfl_ctrl.bits.block_task = 0;
2087 - arg->ovfl_ctrl.bits.mask_monitoring = 0;
2088 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
2092 - DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
2095 - * increment number of buffer overflow.
2096 - * important to detect duplicate set of samples.
2098 - hdr->hdr_overflows++;
2101 - * if no notification requested, then we saturate the buffer
2103 - if (ovfl_notify == 0) {
2104 - arg->ovfl_ctrl.bits.notify_user = 0;
2105 - arg->ovfl_ctrl.bits.block_task = 0;
2106 - arg->ovfl_ctrl.bits.mask_monitoring = 1;
2107 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
2109 - arg->ovfl_ctrl.bits.notify_user = 1;
2110 - arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */
2111 - arg->ovfl_ctrl.bits.mask_monitoring = 1;
2112 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
2114 - return -1; /* we are full, sorry */
2118 -default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
2120 - pfm_default_smpl_hdr_t *hdr;
2122 - hdr = (pfm_default_smpl_hdr_t *)buf;
2124 - hdr->hdr_count = 0UL;
2125 - hdr->hdr_cur_offs = sizeof(*hdr);
2127 - ctrl->bits.mask_monitoring = 0;
2128 - ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
2134 -default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
2136 - DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
2140 -static pfm_buffer_fmt_t default_fmt={
2141 - .fmt_name = "default_format",
2142 - .fmt_uuid = PFM_DEFAULT_SMPL_UUID,
2143 - .fmt_arg_size = sizeof(pfm_default_smpl_arg_t),
2144 - .fmt_validate = default_validate,
2145 - .fmt_getsize = default_get_size,
2146 - .fmt_init = default_init,
2147 - .fmt_handler = default_handler,
2148 - .fmt_restart = default_restart,
2149 - .fmt_restart_active = default_restart,
2150 - .fmt_exit = default_exit,
2154 -pfm_default_smpl_init_module(void)
2158 - ret = pfm_register_buffer_fmt(&default_fmt);
2160 - printk("perfmon_default_smpl: %s v%u.%u registered\n",
2161 - default_fmt.fmt_name,
2162 - PFM_DEFAULT_SMPL_VERSION_MAJ,
2163 - PFM_DEFAULT_SMPL_VERSION_MIN);
2165 - printk("perfmon_default_smpl: %s cannot register ret=%d\n",
2166 - default_fmt.fmt_name,
2174 -pfm_default_smpl_cleanup_module(void)
2177 - ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
2179 - printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
2182 -module_init(pfm_default_smpl_init_module);
2183 -module_exit(pfm_default_smpl_cleanup_module);
2185 diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
2186 deleted file mode 100644
2187 index 6748947..0000000
2188 --- a/arch/ia64/kernel/perfmon_generic.h
2192 - * This file contains the generic PMU register description tables
2193 - * and pmc checker used by perfmon.c.
2195 - * Copyright (C) 2002-2003 Hewlett Packard Co
2196 - * Stephane Eranian <eranian@hpl.hp.com>
2199 -static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
2200 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2201 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2202 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2203 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2204 -/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2205 -/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2206 -/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2207 -/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2208 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2211 -static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
2212 -/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2213 -/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2214 -/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2215 -/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2216 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
2217 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
2218 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
2219 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
2220 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2224 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2226 -static pmu_config_t pmu_conf_gen={
2227 - .pmu_name = "Generic",
2228 - .pmu_family = 0xff, /* any */
2229 - .ovfl_val = (1UL << 32) - 1,
2230 - .num_ibrs = 0, /* does not use */
2231 - .num_dbrs = 0, /* does not use */
2232 - .pmd_desc = pfm_gen_pmd_desc,
2233 - .pmc_desc = pfm_gen_pmc_desc
2236 diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
2237 deleted file mode 100644
2238 index d1d508a..0000000
2239 --- a/arch/ia64/kernel/perfmon_itanium.h
2243 - * This file contains the Itanium PMU register description tables
2244 - * and pmc checker used by perfmon.c.
2246 - * Copyright (C) 2002-2003 Hewlett Packard Co
2247 - * Stephane Eranian <eranian@hpl.hp.com>
2249 -static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
2251 -static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
2252 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2253 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2254 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2255 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2256 -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2257 -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2258 -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2259 -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2260 -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2261 -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2262 -/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2263 -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2264 -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2265 -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2266 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2269 -static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
2270 -/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2271 -/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2272 -/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2273 -/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2274 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
2275 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
2276 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
2277 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
2278 -/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2279 -/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2280 -/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2281 -/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2282 -/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2283 -/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2284 -/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2285 -/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2286 -/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2287 -/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2288 - { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2292 -pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2297 - /* sanitfy check */
2298 - if (ctx == NULL) return -EINVAL;
2300 - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
2303 - * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
2304 - * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
2306 - if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
2308 - DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
2310 - /* don't mix debug with perfmon */
2311 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2314 - * a count of 0 will mark the debug registers as in use and also
2315 - * ensure that they are properly cleared.
2317 - ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
2318 - if (ret) return ret;
2322 - * we must clear the (data) debug registers if pmc11.pt bit is cleared
2323 - * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
2325 - if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
2327 - DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
2329 - /* don't mix debug with perfmon */
2330 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2333 - * a count of 0 will mark the debug registers as in use and also
2334 - * ensure that they are properly cleared.
2336 - ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
2337 - if (ret) return ret;
2343 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2345 -static pmu_config_t pmu_conf_ita={
2346 - .pmu_name = "Itanium",
2347 - .pmu_family = 0x7,
2348 - .ovfl_val = (1UL << 32) - 1,
2349 - .pmd_desc = pfm_ita_pmd_desc,
2350 - .pmc_desc = pfm_ita_pmc_desc,
2353 - .use_rr_dbregs = 1, /* debug register are use for range retrictions */
2357 diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
2358 deleted file mode 100644
2359 index c4bec7a..0000000
2360 --- a/arch/ia64/kernel/perfmon_mckinley.h
2364 - * This file contains the McKinley PMU register description tables
2365 - * and pmc checker used by perfmon.c.
2367 - * Copyright (C) 2002-2003 Hewlett Packard Co
2368 - * Stephane Eranian <eranian@hpl.hp.com>
2370 -static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
2372 -static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
2373 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2374 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2375 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2376 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2377 -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2378 -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2379 -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2380 -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2381 -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2382 -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2383 -/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2384 -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2385 -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2386 -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2387 -/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2388 -/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2389 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2392 -static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
2393 -/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2394 -/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2395 -/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2396 -/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2397 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
2398 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
2399 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
2400 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
2401 -/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2402 -/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2403 -/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2404 -/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2405 -/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2406 -/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2407 -/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2408 -/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2409 -/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2410 -/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2411 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2415 - * PMC reserved fields must have their power-up values preserved
2418 -pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2420 - unsigned long tmp1, tmp2, ival = *val;
2422 - /* remove reserved areas from user value */
2423 - tmp1 = ival & PMC_RSVD_MASK(cnum);
2425 - /* get reserved fields values */
2426 - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
2428 - *val = tmp1 | tmp2;
2430 - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
2431 - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
2436 - * task can be NULL if the context is unloaded
2439 -pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2441 - int ret = 0, check_case1 = 0;
2442 - unsigned long val8 = 0, val14 = 0, val13 = 0;
2445 - /* first preserve the reserved fields */
2446 - pfm_mck_reserved(cnum, val, regs);
2448 - /* sanitfy check */
2449 - if (ctx == NULL) return -EINVAL;
2451 - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
2454 - * we must clear the debug registers if pmc13 has a value which enable
2455 - * memory pipeline event constraints. In this case we need to clear the
2456 - * the debug registers if they have not yet been accessed. This is required
2457 - * to avoid picking stale state.
2458 - * PMC13 is "active" if:
2459 - * one of the pmc13.cfg_dbrpXX field is different from 0x3
2461 - * at the corresponding pmc13.ena_dbrpXX is set.
2463 - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
2465 - if (cnum == 13 && is_loaded
2466 - && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
2468 - DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
2470 - /* don't mix debug with perfmon */
2471 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2474 - * a count of 0 will mark the debug registers as in use and also
2475 - * ensure that they are properly cleared.
2477 - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
2478 - if (ret) return ret;
2481 - * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
2482 - * before they are (fl_using_dbreg==0) to avoid picking up stale information.
2484 - if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
2486 - DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
2488 - /* don't mix debug with perfmon */
2489 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2492 - * a count of 0 will mark the debug registers as in use and also
2493 - * ensure that they are properly cleared.
2495 - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
2496 - if (ret) return ret;
2501 - case 4: *val |= 1UL << 23; /* force power enable bit */
2503 - case 8: val8 = *val;
2504 - val13 = ctx->ctx_pmcs[13];
2505 - val14 = ctx->ctx_pmcs[14];
2508 - case 13: val8 = ctx->ctx_pmcs[8];
2510 - val14 = ctx->ctx_pmcs[14];
2513 - case 14: val8 = ctx->ctx_pmcs[8];
2514 - val13 = ctx->ctx_pmcs[13];
2519 - /* check illegal configuration which can produce inconsistencies in tagging
2520 - * i-side events in L1D and L2 caches
2522 - if (check_case1) {
2523 - ret = ((val13 >> 45) & 0xf) == 0
2524 - && ((val8 & 0x1) == 0)
2525 - && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
2526 - ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
2528 - if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
2531 - return ret ? -EINVAL : 0;
2535 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2537 -static pmu_config_t pmu_conf_mck={
2538 - .pmu_name = "Itanium 2",
2539 - .pmu_family = 0x1f,
2540 - .flags = PFM_PMU_IRQ_RESEND,
2541 - .ovfl_val = (1UL << 47) - 1,
2542 - .pmd_desc = pfm_mck_pmd_desc,
2543 - .pmc_desc = pfm_mck_pmc_desc,
2546 - .use_rr_dbregs = 1 /* debug register are use for range restrictions */
2550 diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h
2551 deleted file mode 100644
2552 index 7f8da4c..0000000
2553 --- a/arch/ia64/kernel/perfmon_montecito.h
2557 - * This file contains the Montecito PMU register description tables
2558 - * and pmc checker used by perfmon.c.
2560 - * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
2561 - * Contributed by Stephane Eranian <eranian@hpl.hp.com>
2563 -static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
2565 -#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
2566 - RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
2567 -#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36))
2568 -#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35))
2570 -static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
2571 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2572 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2573 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2574 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2575 -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
2576 -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
2577 -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
2578 -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
2579 -/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
2580 -/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
2581 -/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
2582 -/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
2583 -/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
2584 -/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
2585 -/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
2586 -/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
2587 -/* pmc16 */ { PFM_REG_NOTIMPL, },
2588 -/* pmc17 */ { PFM_REG_NOTIMPL, },
2589 -/* pmc18 */ { PFM_REG_NOTIMPL, },
2590 -/* pmc19 */ { PFM_REG_NOTIMPL, },
2591 -/* pmc20 */ { PFM_REG_NOTIMPL, },
2592 -/* pmc21 */ { PFM_REG_NOTIMPL, },
2593 -/* pmc22 */ { PFM_REG_NOTIMPL, },
2594 -/* pmc23 */ { PFM_REG_NOTIMPL, },
2595 -/* pmc24 */ { PFM_REG_NOTIMPL, },
2596 -/* pmc25 */ { PFM_REG_NOTIMPL, },
2597 -/* pmc26 */ { PFM_REG_NOTIMPL, },
2598 -/* pmc27 */ { PFM_REG_NOTIMPL, },
2599 -/* pmc28 */ { PFM_REG_NOTIMPL, },
2600 -/* pmc29 */ { PFM_REG_NOTIMPL, },
2601 -/* pmc30 */ { PFM_REG_NOTIMPL, },
2602 -/* pmc31 */ { PFM_REG_NOTIMPL, },
2603 -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2604 -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2605 -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2606 -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2607 -/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2608 -/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
2609 -/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2610 -/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
2611 -/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
2612 -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2613 -/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
2614 - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
2617 -static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
2618 -/* pmd0 */ { PFM_REG_NOTIMPL, },
2619 -/* pmd1 */ { PFM_REG_NOTIMPL, },
2620 -/* pmd2 */ { PFM_REG_NOTIMPL, },
2621 -/* pmd3 */ { PFM_REG_NOTIMPL, },
2622 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
2623 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
2624 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
2625 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
2626 -/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}},
2627 -/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
2628 -/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
2629 -/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
2630 -/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
2631 -/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
2632 -/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
2633 -/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
2634 -/* pmd16 */ { PFM_REG_NOTIMPL, },
2635 -/* pmd17 */ { PFM_REG_NOTIMPL, },
2636 -/* pmd18 */ { PFM_REG_NOTIMPL, },
2637 -/* pmd19 */ { PFM_REG_NOTIMPL, },
2638 -/* pmd20 */ { PFM_REG_NOTIMPL, },
2639 -/* pmd21 */ { PFM_REG_NOTIMPL, },
2640 -/* pmd22 */ { PFM_REG_NOTIMPL, },
2641 -/* pmd23 */ { PFM_REG_NOTIMPL, },
2642 -/* pmd24 */ { PFM_REG_NOTIMPL, },
2643 -/* pmd25 */ { PFM_REG_NOTIMPL, },
2644 -/* pmd26 */ { PFM_REG_NOTIMPL, },
2645 -/* pmd27 */ { PFM_REG_NOTIMPL, },
2646 -/* pmd28 */ { PFM_REG_NOTIMPL, },
2647 -/* pmd29 */ { PFM_REG_NOTIMPL, },
2648 -/* pmd30 */ { PFM_REG_NOTIMPL, },
2649 -/* pmd31 */ { PFM_REG_NOTIMPL, },
2650 -/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
2651 -/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
2652 -/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
2653 -/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
2654 -/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
2655 -/* pmd37 */ { PFM_REG_NOTIMPL, },
2656 -/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2657 -/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2658 -/* pmd40 */ { PFM_REG_NOTIMPL, },
2659 -/* pmd41 */ { PFM_REG_NOTIMPL, },
2660 -/* pmd42 */ { PFM_REG_NOTIMPL, },
2661 -/* pmd43 */ { PFM_REG_NOTIMPL, },
2662 -/* pmd44 */ { PFM_REG_NOTIMPL, },
2663 -/* pmd45 */ { PFM_REG_NOTIMPL, },
2664 -/* pmd46 */ { PFM_REG_NOTIMPL, },
2665 -/* pmd47 */ { PFM_REG_NOTIMPL, },
2666 -/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2667 -/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2668 -/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2669 -/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2670 -/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2671 -/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2672 -/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2673 -/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2674 -/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2675 -/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2676 -/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2677 -/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2678 -/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2679 -/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2680 -/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2681 -/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2682 - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
2686 - * PMC reserved fields must have their power-up values preserved
2689 -pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2691 - unsigned long tmp1, tmp2, ival = *val;
2693 - /* remove reserved areas from user value */
2694 - tmp1 = ival & PMC_RSVD_MASK(cnum);
2696 - /* get reserved fields values */
2697 - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
2699 - *val = tmp1 | tmp2;
2701 - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
2702 - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
2707 - * task can be NULL if the context is unloaded
2710 -pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2713 - unsigned long val32 = 0, val38 = 0, val41 = 0;
2714 - unsigned long tmpval;
2715 - int check_case1 = 0;
2718 - /* first preserve the reserved fields */
2719 - pfm_mont_reserved(cnum, val, regs);
2723 - /* sanity check */
2724 - if (ctx == NULL) return -EINVAL;
2726 - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
2729 - * we must clear the debug registers if pmc41 has a value which enable
2730 - * memory pipeline event constraints. In this case we need to clear the
2731 - * the debug registers if they have not yet been accessed. This is required
2732 - * to avoid picking stale state.
2733 - * PMC41 is "active" if:
2734 - * one of the pmc41.cfg_dtagXX field is different from 0x3
2736 - * at the corresponding pmc41.en_dbrpXX is set.
2738 - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
2740 - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
2742 - if (cnum == 41 && is_loaded
2743 - && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
2745 - DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
2747 - /* don't mix debug with perfmon */
2748 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2751 - * a count of 0 will mark the debug registers if:
2754 - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
2755 - if (ret) return ret;
2758 - * we must clear the (instruction) debug registers if:
2759 - * pmc38.ig_ibrpX is 0 (enabled)
2761 - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
2763 - if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
2765 - DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
2767 - /* don't mix debug with perfmon */
2768 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2771 - * a count of 0 will mark the debug registers as in use and also
2772 - * ensure that they are properly cleared.
2774 - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
2775 - if (ret) return ret;
2779 - case 32: val32 = *val;
2780 - val38 = ctx->ctx_pmcs[38];
2781 - val41 = ctx->ctx_pmcs[41];
2784 - case 38: val38 = *val;
2785 - val32 = ctx->ctx_pmcs[32];
2786 - val41 = ctx->ctx_pmcs[41];
2789 - case 41: val41 = *val;
2790 - val32 = ctx->ctx_pmcs[32];
2791 - val38 = ctx->ctx_pmcs[38];
2795 - /* check illegal configuration which can produce inconsistencies in tagging
2796 - * i-side events in L1D and L2 caches
2798 - if (check_case1) {
2799 - ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
2800 - && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
2801 - || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
2803 - DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
2812 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2814 -static pmu_config_t pmu_conf_mont={
2815 - .pmu_name = "Montecito",
2816 - .pmu_family = 0x20,
2817 - .flags = PFM_PMU_IRQ_RESEND,
2818 - .ovfl_val = (1UL << 47) - 1,
2819 - .pmd_desc = pfm_mont_pmd_desc,
2820 - .pmc_desc = pfm_mont_pmc_desc,
2823 - .use_rr_dbregs = 1 /* debug register are use for range retrictions */
2825 diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
2826 index 3ab8373..a7dfb39 100644
2827 --- a/arch/ia64/kernel/process.c
2828 +++ b/arch/ia64/kernel/process.c
2830 #include <linux/delay.h>
2831 #include <linux/kdebug.h>
2832 #include <linux/utsname.h>
2833 +#include <linux/perfmon_kern.h>
2835 #include <asm/cpu.h>
2836 #include <asm/delay.h>
2841 -#ifdef CONFIG_PERFMON
2842 -# include <asm/perfmon.h>
2845 #include "sigframe.h"
2847 void (*ia64_mark_idle)(int);
2848 @@ -162,10 +159,8 @@ show_regs (struct pt_regs *regs)
2850 void tsk_clear_notify_resume(struct task_struct *tsk)
2852 -#ifdef CONFIG_PERFMON
2853 - if (tsk->thread.pfm_needs_checking)
2854 + if (test_ti_thread_flag(task_thread_info(tsk), TIF_PERFMON_WORK))
2857 if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE))
2859 clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME);
2860 @@ -188,14 +183,9 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
2864 -#ifdef CONFIG_PERFMON
2865 - if (current->thread.pfm_needs_checking)
2867 - * Note: pfm_handle_work() allow us to call it with interrupts
2868 - * disabled, and may enable interrupts within the function.
2870 - pfm_handle_work();
2872 + /* process perfmon asynchronous work (e.g. block thread or reset) */
2873 + if (test_thread_flag(TIF_PERFMON_WORK))
2874 + pfm_handle_work(task_pt_regs(current));
2876 /* deal with pending signal delivery */
2877 if (test_thread_flag(TIF_SIGPENDING)) {
2878 @@ -212,22 +202,15 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
2879 local_irq_disable(); /* force interrupt disable */
2882 -static int pal_halt = 1;
2883 static int can_do_pal_halt = 1;
2885 static int __init nohalt_setup(char * str)
2887 - pal_halt = can_do_pal_halt = 0;
2888 + can_do_pal_halt = 0;
2891 __setup("nohalt", nohalt_setup);
2894 -update_pal_halt_status(int status)
2896 - can_do_pal_halt = pal_halt && status;
2900 * We use this if we don't have any better idle routine..
2902 @@ -236,6 +219,22 @@ default_idle (void)
2905 while (!need_resched()) {
2906 +#ifdef CONFIG_PERFMON
2909 + * If requested, we stop the PMU to avoid
2910 + * measuring across the core idle loop.
2912 + * dcr.pp is not modified on purpose
2913 + * it is used when coming out of
2914 + * safe_halt() via interrupt
2916 + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) {
2917 + psr = ia64_getreg(_IA64_REG_PSR);
2918 + if (psr & IA64_PSR_PP)
2919 + ia64_rsm(IA64_PSR_PP);
2922 if (can_do_pal_halt) {
2923 local_irq_disable();
2924 if (!need_resched()) {
2925 @@ -244,6 +243,12 @@ default_idle (void)
2929 +#ifdef CONFIG_PERFMON
2930 + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) {
2931 + if (psr & IA64_PSR_PP)
2932 + ia64_ssm(IA64_PSR_PP);
2938 @@ -344,22 +349,9 @@ cpu_idle (void)
2940 ia64_save_extra (struct task_struct *task)
2942 -#ifdef CONFIG_PERFMON
2943 - unsigned long info;
2946 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
2947 ia64_save_debug_regs(&task->thread.dbr[0]);
2949 -#ifdef CONFIG_PERFMON
2950 - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
2951 - pfm_save_regs(task);
2953 - info = __get_cpu_var(pfm_syst_info);
2954 - if (info & PFM_CPUINFO_SYST_WIDE)
2955 - pfm_syst_wide_update_task(task, info, 0);
2958 #ifdef CONFIG_IA32_SUPPORT
2959 if (IS_IA32_PROCESS(task_pt_regs(task)))
2960 ia32_save_state(task);
2961 @@ -369,22 +361,9 @@ ia64_save_extra (struct task_struct *task)
2963 ia64_load_extra (struct task_struct *task)
2965 -#ifdef CONFIG_PERFMON
2966 - unsigned long info;
2969 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
2970 ia64_load_debug_regs(&task->thread.dbr[0]);
2972 -#ifdef CONFIG_PERFMON
2973 - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
2974 - pfm_load_regs(task);
2976 - info = __get_cpu_var(pfm_syst_info);
2977 - if (info & PFM_CPUINFO_SYST_WIDE)
2978 - pfm_syst_wide_update_task(task, info, 1);
2981 #ifdef CONFIG_IA32_SUPPORT
2982 if (IS_IA32_PROCESS(task_pt_regs(task)))
2983 ia32_load_state(task);
2984 @@ -510,8 +489,7 @@ copy_thread (int nr, unsigned long clone_flags,
2985 * call behavior where scratch registers are preserved across
2986 * system calls (unless used by the system call itself).
2988 -# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
2989 - | IA64_THREAD_PM_VALID)
2990 +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID)
2991 # define THREAD_FLAGS_TO_SET 0
2992 p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
2993 | THREAD_FLAGS_TO_SET);
2994 @@ -533,10 +511,8 @@ copy_thread (int nr, unsigned long clone_flags,
2998 -#ifdef CONFIG_PERFMON
2999 - if (current->thread.pfm_context)
3000 - pfm_inherit(p, child_ptregs);
3002 + pfm_copy_thread(p);
3007 @@ -745,15 +721,13 @@ exit_thread (void)
3010 ia64_drop_fpu(current);
3011 -#ifdef CONFIG_PERFMON
3012 - /* if needed, stop monitoring and flush state to perfmon context */
3013 - if (current->thread.pfm_context)
3014 - pfm_exit_thread(current);
3016 + /* if needed, stop monitoring and flush state to perfmon context */
3017 + pfm_exit_thread();
3019 /* free debug register resources */
3020 - if (current->thread.flags & IA64_THREAD_DBG_VALID)
3021 - pfm_release_debug_registers(current);
3023 + pfm_release_dbregs(current);
3025 if (IS_IA32_PROCESS(task_pt_regs(current)))
3026 ia32_drop_ia64_partial_page_list(current);
3028 diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
3029 index 2a9943b..bb1ca1e 100644
3030 --- a/arch/ia64/kernel/ptrace.c
3031 +++ b/arch/ia64/kernel/ptrace.c
3033 #include <linux/security.h>
3034 #include <linux/audit.h>
3035 #include <linux/signal.h>
3036 +#include <linux/perfmon_kern.h>
3037 #include <linux/regset.h>
3038 #include <linux/elf.h>
3041 #include <asm/system.h>
3042 #include <asm/uaccess.h>
3043 #include <asm/unwind.h>
3044 -#ifdef CONFIG_PERFMON
3045 -#include <asm/perfmon.h>
3050 @@ -2124,7 +2122,6 @@ access_uarea(struct task_struct *child, unsigned long addr,
3051 "address 0x%lx\n", addr);
3054 -#ifdef CONFIG_PERFMON
3056 * Check if debug registers are used by perfmon. This
3057 * test must be done once we know that we can do the
3058 @@ -2142,9 +2139,8 @@ access_uarea(struct task_struct *child, unsigned long addr,
3059 * IA64_THREAD_DBG_VALID. The registers are restored
3060 * by the PMU context switch code.
3062 - if (pfm_use_debug_registers(child))
3063 + if (pfm_use_dbregs(child))
3067 if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
3068 child->thread.flags |= IA64_THREAD_DBG_VALID;
3069 diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
3070 index de636b2..677fa68 100644
3071 --- a/arch/ia64/kernel/setup.c
3072 +++ b/arch/ia64/kernel/setup.c
3074 #include <linux/cpufreq.h>
3075 #include <linux/kexec.h>
3076 #include <linux/crash_dump.h>
3077 +#include <linux/perfmon_kern.h>
3079 #include <asm/ia32.h>
3080 #include <asm/machvec.h>
3081 @@ -1051,6 +1052,8 @@ cpu_init (void)
3083 platform_cpu_init();
3084 pm_idle = default_idle;
3086 + pfm_init_percpu();
3090 diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
3091 index d8f05e5..3d7a739 100644
3092 --- a/arch/ia64/kernel/smpboot.c
3093 +++ b/arch/ia64/kernel/smpboot.c
3095 #include <linux/efi.h>
3096 #include <linux/percpu.h>
3097 #include <linux/bitops.h>
3098 +#include <linux/perfmon_kern.h>
3100 #include <asm/atomic.h>
3101 #include <asm/cache.h>
3102 @@ -381,10 +382,6 @@ smp_callin (void)
3103 extern void ia64_init_itm(void);
3104 extern volatile int time_keeper_id;
3106 -#ifdef CONFIG_PERFMON
3107 - extern void pfm_init_percpu(void);
3110 cpuid = smp_processor_id();
3111 phys_id = hard_smp_processor_id();
3112 itc_master = time_keeper_id;
3113 @@ -410,10 +407,6 @@ smp_callin (void)
3115 ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
3117 -#ifdef CONFIG_PERFMON
3118 - pfm_init_percpu();
3123 if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
3124 @@ -751,6 +744,7 @@ int __cpu_disable(void)
3125 cpu_clear(cpu, cpu_online_map);
3126 local_flush_tlb_all();
3127 cpu_clear(cpu, cpu_callin_map);
3128 + pfm_cpu_disable();
3132 diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
3133 index bcbb6d8..a0ed33a 100644
3134 --- a/arch/ia64/kernel/sys_ia64.c
3135 +++ b/arch/ia64/kernel/sys_ia64.c
3136 @@ -284,3 +284,11 @@ sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, un
3139 #endif /* CONFIG_PCI */
3141 +#ifndef CONFIG_IA64_PERFMON_COMPAT
3143 +sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
3148 diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
3149 index 98771e2..077fd09 100644
3150 --- a/arch/ia64/lib/Makefile
3151 +++ b/arch/ia64/lib/Makefile
3152 @@ -13,7 +13,6 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
3154 obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
3155 obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
3156 -lib-$(CONFIG_PERFMON) += carta_random.o
3159 AFLAGS___udivdi3.o = -DUNSIGNED
3160 diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
3161 index 125a602..892de6a 100644
3162 --- a/arch/ia64/oprofile/init.c
3163 +++ b/arch/ia64/oprofile/init.c
3165 #include <linux/init.h>
3166 #include <linux/errno.h>
3168 -extern int perfmon_init(struct oprofile_operations * ops);
3169 -extern void perfmon_exit(void);
3170 +extern int op_perfmon_init(struct oprofile_operations * ops);
3171 +extern void op_perfmon_exit(void);
3172 extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
3174 int __init oprofile_arch_init(struct oprofile_operations * ops)
3175 @@ -22,7 +22,7 @@ int __init oprofile_arch_init(struct oprofile_operations * ops)
3177 #ifdef CONFIG_PERFMON
3178 /* perfmon_init() can fail, but we have no way to report it */
3179 - ret = perfmon_init(ops);
3180 + ret = op_perfmon_init(ops);
3182 ops->backtrace = ia64_backtrace;
3184 @@ -33,6 +33,6 @@ int __init oprofile_arch_init(struct oprofile_operations * ops)
3185 void oprofile_arch_exit(void)
3187 #ifdef CONFIG_PERFMON
3189 + op_perfmon_exit();
3192 diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
3193 index bc41dd3..6fa9d17 100644
3194 --- a/arch/ia64/oprofile/perfmon.c
3195 +++ b/arch/ia64/oprofile/perfmon.c
3197 #include <linux/kernel.h>
3198 #include <linux/oprofile.h>
3199 #include <linux/sched.h>
3200 -#include <asm/perfmon.h>
3201 +#include <linux/module.h>
3202 +#include <linux/perfmon_kern.h>
3203 #include <asm/ptrace.h>
3204 #include <asm/errno.h>
3206 static int allow_ints;
3209 -perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
3210 - struct pt_regs *regs, unsigned long stamp)
3211 +perfmon_handler(struct pfm_context *ctx,
3212 + unsigned long ip, u64 stamp, void *data)
3214 - int event = arg->pmd_eventid;
3215 + struct pt_regs *regs;
3216 + struct pfm_ovfl_arg *arg;
3219 + arg = &ctx->ovfl_arg;
3221 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
3222 + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
3224 /* the owner of the oprofile event buffer may have exited
3225 * without perfmon being shutdown (e.g. SIGSEGV)
3228 - oprofile_add_sample(regs, event);
3229 + oprofile_add_sample(regs, arg->pmd_eventid);
3233 @@ -45,17 +50,13 @@ static void perfmon_stop(void)
3238 -#define OPROFILE_FMT_UUID { \
3239 - 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
3241 -static pfm_buffer_fmt_t oprofile_fmt = {
3242 - .fmt_name = "oprofile_format",
3243 - .fmt_uuid = OPROFILE_FMT_UUID,
3244 - .fmt_handler = perfmon_handler,
3245 +static struct pfm_smpl_fmt oprofile_fmt = {
3246 + .fmt_name = "OProfile",
3247 + .fmt_handler = perfmon_handler,
3248 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
3249 + .owner = THIS_MODULE
3253 static char * get_cpu_type(void)
3255 __u8 family = local_cpu_data->family;
3256 @@ -75,9 +76,9 @@ static char * get_cpu_type(void)
3258 static int using_perfmon;
3260 -int perfmon_init(struct oprofile_operations * ops)
3261 +int __init op_perfmon_init(struct oprofile_operations * ops)
3263 - int ret = pfm_register_buffer_fmt(&oprofile_fmt);
3264 + int ret = pfm_fmt_register(&oprofile_fmt);
3268 @@ -90,10 +91,10 @@ int perfmon_init(struct oprofile_operations * ops)
3272 -void perfmon_exit(void)
3273 +void __exit op_perfmon_exit(void)
3278 - pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
3279 + pfm_fmt_unregister(&oprofile_fmt);
3281 diff --git a/arch/ia64/perfmon/Kconfig b/arch/ia64/perfmon/Kconfig
3282 new file mode 100644
3283 index 0000000..99c68bd
3285 +++ b/arch/ia64/perfmon/Kconfig
3287 +menu "Hardware Performance Monitoring support"
3289 + bool "Perfmon2 performance monitoring interface"
3292 + Enables the perfmon2 interface to access the hardware
3293 + performance counters. See <http://perfmon2.sf.net/> for
3296 +config PERFMON_DEBUG
3297 + bool "Perfmon debugging"
3299 + depends on PERFMON
3301 + Enables perfmon debugging support
3303 +config PERFMON_DEBUG_FS
3304 + bool "Enable perfmon statistics reporting via debugfs"
3306 + depends on PERFMON && DEBUG_FS
3308 + Enable collection and reporting of perfmon timing statistics under
3309 + debugfs. This is used for debugging and performance analysis of the
3310 + subsystem. The debugfs filesystem must be mounted.
3312 +config IA64_PERFMON_COMPAT
3313 + bool "Enable old perfmon-2 compatbility mode"
3315 + depends on PERFMON
3317 + Enable this option to allow performance tools which used the old
3318 + perfmon-2 interface to continue to work. Old tools are those using
3319 + the obsolete commands and arguments. Check your programs and look
3320 + in include/asm-ia64/perfmon_compat.h for more information.
3322 +config IA64_PERFMON_GENERIC
3323 + tristate "Generic IA-64 PMU support"
3324 + depends on PERFMON
3327 + Enables generic IA-64 PMU support.
3328 + The generic PMU is defined by the IA-64 architecture document.
3329 + This option should only be necessary when running with a PMU that
3330 + is not yet explicitely supported. Even then, there is no guarantee
3331 + that this support will work.
3333 +config IA64_PERFMON_ITANIUM
3334 + tristate "Itanium (Merced) Performance Monitoring support"
3335 + depends on PERFMON
3338 + Enables Itanium (Merced) PMU support.
3340 +config IA64_PERFMON_MCKINLEY
3341 + tristate "Itanium 2 (McKinley) Performance Monitoring support"
3342 + depends on PERFMON
3345 + Enables Itanium 2 (McKinley, Madison, Deerfield) PMU support.
3347 +config IA64_PERFMON_MONTECITO
3348 + tristate "Itanium 2 9000 (Montecito) Performance Monitoring support"
3349 + depends on PERFMON
3352 + Enables support for Itanium 2 9000 (Montecito) PMU.
3354 diff --git a/arch/ia64/perfmon/Makefile b/arch/ia64/perfmon/Makefile
3355 new file mode 100644
3356 index 0000000..c9cdf9f
3358 +++ b/arch/ia64/perfmon/Makefile
3361 +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
3362 +# Contributed by Stephane Eranian <eranian@hpl.hp.com>
3364 +obj-$(CONFIG_PERFMON) += perfmon.o
3365 +obj-$(CONFIG_IA64_PERFMON_COMPAT) += perfmon_default_smpl.o \
3367 +obj-$(CONFIG_IA64_PERFMON_GENERIC) += perfmon_generic.o
3368 +obj-$(CONFIG_IA64_PERFMON_ITANIUM) += perfmon_itanium.o
3369 +obj-$(CONFIG_IA64_PERFMON_MCKINLEY) += perfmon_mckinley.o
3370 +obj-$(CONFIG_IA64_PERFMON_MONTECITO) += perfmon_montecito.o
3371 diff --git a/arch/ia64/perfmon/perfmon.c b/arch/ia64/perfmon/perfmon.c
3372 new file mode 100644
3373 index 0000000..3f59410
3375 +++ b/arch/ia64/perfmon/perfmon.c
3378 + * This file implements the IA-64 specific
3379 + * support for the perfmon2 interface
3381 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
3382 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
3384 + * This program is free software; you can redistribute it and/or
3385 + * modify it under the terms of version 2 of the GNU General Public
3386 + * License as published by the Free Software Foundation.
3388 + * This program is distributed in the hope that it will be useful,
3389 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3390 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3391 + * General Public License for more details.
3393 + * You should have received a copy of the GNU General Public License
3394 + * along with this program; if not, write to the Free Software
3395 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
3398 +#include <linux/module.h>
3399 +#include <linux/perfmon_kern.h>
3401 +struct pfm_arch_session {
3402 + u32 pfs_sys_use_dbr; /* syswide session uses dbr */
3403 + u32 pfs_ptrace_use_dbr; /* a thread uses dbr via ptrace()*/
3406 +DEFINE_PER_CPU(u32, pfm_syst_info);
3408 +static struct pfm_arch_session pfm_arch_sessions;
3409 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_arch_sessions_lock);
3411 +static inline void pfm_clear_psr_pp(void)
3413 + ia64_rsm(IA64_PSR_PP);
3416 +static inline void pfm_set_psr_pp(void)
3418 + ia64_ssm(IA64_PSR_PP);
3421 +static inline void pfm_clear_psr_up(void)
3423 + ia64_rsm(IA64_PSR_UP);
3426 +static inline void pfm_set_psr_up(void)
3428 + ia64_ssm(IA64_PSR_UP);
3431 +static inline void pfm_set_psr_l(u64 val)
3433 + ia64_setreg(_IA64_REG_PSR_L, val);
3436 +static inline void pfm_restore_ibrs(u64 *ibrs, unsigned int nibrs)
3440 + for (i = 0; i < nibrs; i++) {
3441 + ia64_set_ibr(i, ibrs[i]);
3442 + ia64_dv_serialize_instruction();
3447 +static inline void pfm_restore_dbrs(u64 *dbrs, unsigned int ndbrs)
3451 + for (i = 0; i < ndbrs; i++) {
3452 + ia64_set_dbr(i, dbrs[i]);
3453 + ia64_dv_serialize_data();
3458 +irqreturn_t pmu_interrupt_handler(int irq, void *arg)
3460 + struct pt_regs *regs;
3461 + regs = get_irq_regs();
3463 + pfm_interrupt_handler(instruction_pointer(regs), regs);
3465 + return IRQ_HANDLED;
3467 +static struct irqaction perfmon_irqaction = {
3468 + .handler = pmu_interrupt_handler,
3469 + .flags = IRQF_DISABLED, /* means keep interrupts masked */
3473 +void pfm_arch_quiesce_pmu_percpu(void)
3477 + * make sure no measurement is active
3478 + * (may inherit programmed PMCs from EFI).
3480 + pfm_clear_psr_pp();
3481 + pfm_clear_psr_up();
3484 + * ensure dcr.pp is cleared
3486 + dcr = ia64_getreg(_IA64_REG_CR_DCR);
3487 + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
3490 + * we run with the PMU not frozen at all times
3492 + ia64_set_pmc(0, 0);
3496 +void pfm_arch_init_percpu(void)
3498 + pfm_arch_quiesce_pmu_percpu();
3500 + * program PMU interrupt vector
3502 + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
3506 +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
3508 + struct pfm_arch_context *ctx_arch;
3510 + ctx_arch = pfm_ctx_arch(ctx);
3512 + ctx_arch->flags.use_dbr = 0;
3513 + ctx_arch->flags.insecure = (ctx_flags & PFM_ITA_FL_INSECURE) ? 1: 0;
3515 + PFM_DBG("insecure=%d", ctx_arch->flags.insecure);
3521 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
3522 + * Context is locked. Interrupts are masked. Monitoring may be active.
3523 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
3526 + * non-zero : did not save PMDs (as part of stopping the PMU)
3527 + * 0 : saved PMDs (no need to save them in caller)
3529 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
3531 + struct pfm_arch_context *ctx_arch;
3532 + struct pfm_event_set *set;
3535 + ctx_arch = pfm_ctx_arch(ctx);
3536 + set = ctx->active_set;
3539 + * save current PSR: needed because we modify it
3542 + psr = ia64_getreg(_IA64_REG_PSR);
3545 + * stop monitoring:
3546 + * This is the last instruction which may generate an overflow
3548 + * we do not clear ipsr.up
3550 + pfm_clear_psr_up();
3554 + * extract overflow status bits
3556 + tmp = ia64_get_pmc(0) & ~0xf;
3559 + * keep a copy of psr.up (for reload)
3561 + ctx_arch->ctx_saved_psr_up = psr & IA64_PSR_UP;
3564 + * save overflow status bits
3566 + set->povfl_pmds[0] = tmp;
3569 + * record how many pending overflows
3570 + * XXX: assume identity mapping for counters
3572 + set->npend_ovfls = ia64_popcnt(tmp);
3575 + * make sure the PMU is unfrozen for the next task
3577 + if (set->npend_ovfls) {
3578 + ia64_set_pmc(0, 0);
3585 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
3586 + * set cannot be NULL. Context is locked. Interrupts are masked.
3587 + * Caller has already restored all PMD and PMC registers.
3589 + * must reactivate monitoring
3591 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
3593 + struct pfm_arch_context *ctx_arch;
3595 + ctx_arch = pfm_ctx_arch(ctx);
3598 + * when monitoring is not explicitly started
3599 + * then psr_up = 0, in which case we do not
3602 + if (likely(ctx_arch->ctx_saved_psr_up)) {
3608 +int pfm_arch_reserve_session(struct pfm_context *ctx, u32 cpu)
3610 + struct pfm_arch_context *ctx_arch;
3614 + ctx_arch = pfm_ctx_arch(ctx);
3615 + is_system = ctx->flags.system;
3617 + spin_lock(&pfm_arch_sessions_lock);
3619 + if (is_system && ctx_arch->flags.use_dbr) {
3620 + PFM_DBG("syswide context uses dbregs");
3622 + if (pfm_arch_sessions.pfs_ptrace_use_dbr) {
3623 + PFM_DBG("cannot reserve syswide context: "
3624 + "dbregs in use by ptrace");
3627 + pfm_arch_sessions.pfs_sys_use_dbr++;
3630 + spin_unlock(&pfm_arch_sessions_lock);
3635 +void pfm_arch_release_session(struct pfm_context *ctx, u32 cpu)
3637 + struct pfm_arch_context *ctx_arch;
3640 + ctx_arch = pfm_ctx_arch(ctx);
3641 + is_system = ctx->flags.system;
3643 + spin_lock(&pfm_arch_sessions_lock);
3645 + if (is_system && ctx_arch->flags.use_dbr)
3646 + pfm_arch_sessions.pfs_sys_use_dbr--;
3647 + spin_unlock(&pfm_arch_sessions_lock);
3651 + * function called from pfm_load_context_*(). Task is not guaranteed to be
3652 + * current task. If not then other task is guaranteed stopped and off any CPU.
3653 + * context is locked and interrupts are masked.
3655 + * On PFM_LOAD_CONTEXT, the interface guarantees monitoring is stopped.
3657 + * For system-wide task is NULL
3659 +int pfm_arch_load_context(struct pfm_context *ctx)
3661 + struct pfm_arch_context *ctx_arch;
3662 + struct pt_regs *regs;
3665 + ctx_arch = pfm_ctx_arch(ctx);
3668 + * cannot load a context which is using range restrictions,
3669 + * into a thread that is being debugged.
3671 + * if one set out of several is using the debug registers, then
3672 + * we assume the context as whole is using them.
3674 + if (ctx_arch->flags.use_dbr) {
3675 + if (ctx->flags.system) {
3676 + spin_lock(&pfm_arch_sessions_lock);
3678 + if (pfm_arch_sessions.pfs_ptrace_use_dbr) {
3679 + PFM_DBG("cannot reserve syswide context: "
3680 + "dbregs in use by ptrace");
3683 + pfm_arch_sessions.pfs_sys_use_dbr++;
3684 + PFM_DBG("pfs_sys_use_dbr=%u",
3685 + pfm_arch_sessions.pfs_sys_use_dbr);
3687 + spin_unlock(&pfm_arch_sessions_lock);
3689 + } else if (ctx->task->thread.flags & IA64_THREAD_DBG_VALID) {
3690 + PFM_DBG("load_pid [%d] thread is debugged, cannot "
3691 + "use range restrictions", ctx->task->pid);
3699 + * We need to intervene on context switch to toggle the
3700 + * psr.pp bit in system-wide. As such, we set the TIF
3701 + * flag so that pfm_arch_ctxswout_sys() and the
3702 + * pfm_arch_ctxswin_sys() functions get called
3703 + * from pfm_ctxsw_sys();
3705 + if (ctx->flags.system) {
3706 + set_thread_flag(TIF_PERFMON_CTXSW);
3707 + PFM_DBG("[%d] set TIF", current->pid);
3711 + regs = task_pt_regs(ctx->task);
3714 + * self-monitoring systematically allows user level control
3716 + if (ctx->task != current) {
3718 + * when not current, task is stopped, so this is safe
3720 + ctx_arch->ctx_saved_psr_up = 0;
3721 + ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
3723 + ctx_arch->flags.insecure = 1;
3726 + * allow user level control (start/stop/read pmd) if:
3727 + * - self-monitoring
3728 + * - requested at context creation (PFM_IA64_FL_INSECURE)
3730 + * There is not security hole with PFM_IA64_FL_INSECURE because
3731 + * when not self-monitored, the caller must have permissions to
3732 + * attached to the task.
3734 + if (ctx_arch->flags.insecure) {
3735 + ia64_psr(regs)->sp = 0;
3736 + PFM_DBG("clearing psr.sp for [%d]", ctx->task->pid);
3741 +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
3743 +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH)
3744 +#define PFM_ITA_SETFL_BOTH_INTR (PFM_ITA_SETFL_INTR_ONLY|\
3745 + PFM_ITA_SETFL_EXCL_INTR)
3747 +/* exclude return value field */
3748 +#define PFM_SETFL_ALL_MASK (PFM_ITA_SETFL_BOTH_INTR \
3749 + | PFM_SETFL_BOTH_SWITCH \
3750 + | PFM_ITA_SETFL_IDLE_EXCL)
3752 + if ((flags & ~PFM_SETFL_ALL_MASK)) {
3753 + PFM_DBG("invalid flags=0x%x", flags);
3757 + if ((flags & PFM_ITA_SETFL_BOTH_INTR) == PFM_ITA_SETFL_BOTH_INTR) {
3758 + PFM_DBG("both excl intr and ontr only are set");
3762 + if ((flags & PFM_ITA_SETFL_IDLE_EXCL) && !ctx->flags.system) {
3763 + PFM_DBG("idle exclude flag only for system-wide context");
3770 + * function called from pfm_unload_context_*(). Context is locked.
3771 + * interrupts are masked. task is not guaranteed to be current task.
3772 + * Access to PMU is not guaranteed.
3774 + * function must do whatever arch-specific action is required on unload
3777 + * called for both system-wide and per-thread. task is NULL for ssytem-wide
3779 +void pfm_arch_unload_context(struct pfm_context *ctx)
3781 + struct pfm_arch_context *ctx_arch;
3782 + struct pt_regs *regs;
3784 + ctx_arch = pfm_ctx_arch(ctx);
3786 + if (ctx->flags.system) {
3788 + * disable context switch hook
3790 + clear_thread_flag(TIF_PERFMON_CTXSW);
3792 + if (ctx_arch->flags.use_dbr) {
3793 + spin_lock(&pfm_arch_sessions_lock);
3794 + pfm_arch_sessions.pfs_sys_use_dbr--;
3795 + PFM_DBG("sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr);
3796 + spin_unlock(&pfm_arch_sessions_lock);
3799 + regs = task_pt_regs(ctx->task);
3802 + * cancel user level control for per-task context
3804 + ia64_psr(regs)->sp = 1;
3805 + PFM_DBG("setting psr.sp for [%d]", ctx->task->pid);
3810 + * mask monitoring by setting the privilege level to 0
3811 + * we cannot use psr.pp/psr.up for this, it is controlled by
3814 +void pfm_arch_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
3816 + struct pfm_arch_pmu_info *arch_info;
3817 + unsigned long mask;
3820 + arch_info = pfm_pmu_info();
3822 + * as an optimization we look at the first 64 PMC
3823 + * registers only starting at PMC4.
3825 + mask = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR;
3826 + for (i = PFM_ITA_FCNTR; mask; i++, mask >>= 1) {
3827 + if (likely(mask & 0x1))
3828 + ia64_set_pmc(i, set->pmcs[i] & ~0xfUL);
3831 + * make changes visisble
3837 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
3838 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
3839 + * context is locked. Interrupts are masked. set cannot be NULL.
3840 + * Access to the PMU is guaranteed.
3842 + * function must restore all PMD registers from set.
3844 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
3846 + struct pfm_arch_context *ctx_arch;
3847 + unsigned long *mask;
3850 + ctx_arch = pfm_ctx_arch(ctx);
3852 + if (ctx_arch->flags.insecure) {
3853 + num = ctx->regs.num_rw_pmd;
3854 + mask = ctx->regs.rw_pmds;
3856 + num = set->nused_pmds;
3857 + mask = set->used_pmds;
3860 + * must restore all implemented read-write PMDS to avoid leaking
3861 + * information especially when PFM_IA64_FL_INSECURE is set.
3863 + * XXX: should check PFM_IA64_FL_INSECURE==0 and use used_pmd instead
3865 + for (i = 0; num; i++) {
3866 + if (likely(test_bit(i, mask))) {
3867 + pfm_arch_write_pmd(ctx, i, set->pmds[i].value);
3875 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
3876 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
3877 + * context is locked. Interrupts are masked. set cannot be NULL.
3878 + * Access to the PMU is guaranteed.
3880 + * function must restore all PMC registers from set if needed
3882 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
3884 + struct pfm_arch_pmu_info *arch_info;
3885 + u64 mask2 = 0, val, plm;
3886 + unsigned long impl_mask, mask_pmcs;
3889 + arch_info = pfm_pmu_info();
3891 + * as an optimization we only look at the first 64
3892 + * PMC registers. In fact, we should never scan the
3893 + * entire impl_pmcs because ibr/dbr are implemented
3896 + * always skip PMC0-PMC3. PMC0 taken care of when saving
3897 + * state. PMC1-PMC3 not used until we get counters in
3898 + * the 60 and above index range.
3900 + impl_mask = ctx->regs.pmcs[0] >> PFM_ITA_FCNTR;
3901 + mask_pmcs = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR;
3902 + plm = ctx->state == PFM_CTX_MASKED ? ~0xf : ~0x0;
3904 + for (i = PFM_ITA_FCNTR;
3906 + i++, impl_mask >>= 1, mask_pmcs >>= 1) {
3907 + if (likely(impl_mask & 0x1)) {
3908 + mask2 = mask_pmcs & 0x1 ? plm : ~0;
3909 + val = set->pmcs[i] & mask2;
3910 + ia64_set_pmc(i, val);
3911 + PFM_DBG_ovfl("pmc%u=0x%lx", i, val);
3917 + if (set->priv_flags & PFM_ITA_SETFL_USE_DBR) {
3918 + pfm_restore_ibrs(set->pmcs+256, 8);
3919 + pfm_restore_dbrs(set->pmcs+264, 8);
3924 +void pfm_arch_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
3929 + is_system = ctx->flags.system;
3931 + psr = ia64_getreg(_IA64_REG_PSR);
3934 + * monitoring is masked via the PMC.plm
3936 + * As we restore their value, we do not want each counter to
3937 + * restart right away. We stop monitoring using the PSR,
3938 + * restore the PMC (and PMD) and then re-establish the psr
3939 + * as it was. Note that there can be no pending overflow at
3940 + * this point, because monitoring is still MASKED.
3942 + * Because interrupts are masked we can avoid changing
3946 + pfm_clear_psr_pp();
3948 + pfm_clear_psr_up();
3952 + pfm_arch_restore_pmcs(ctx, set);
3957 + * monitoring may start right now but interrupts
3958 + * are still masked
3960 + pfm_set_psr_l(psr);
3965 + * Called from pfm_stop()
3968 + * task is not necessarily current. If not current task, then
3969 + * task is guaranteed stopped and off any cpu. Access to PMU
3970 + * is not guaranteed. Interrupts are masked. Context is locked.
3971 + * Set is the active set.
3973 + * must disable active monitoring. ctx cannot be NULL
3975 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
3977 + struct pfm_arch_context *ctx_arch;
3978 + struct pt_regs *regs;
3981 + ctx_arch = pfm_ctx_arch(ctx);
3982 + regs = task_pt_regs(task);
3984 + if (!ctx->flags.system) {
3986 + * in ZOMBIE state we always have task == current due to
3987 + * pfm_exit_thread()
3989 + ia64_psr(regs)->up = 0;
3990 + ctx_arch->ctx_saved_psr_up = 0;
3993 + * in case of ZOMBIE state, there is no unload to clear
3994 + * insecure monitoring, so we do it in stop instead.
3996 + if (ctx->state == PFM_CTX_ZOMBIE)
3997 + ia64_psr(regs)->sp = 1;
3999 + if (task == current) {
4000 + pfm_clear_psr_up();
4003 + } else if (ctx->flags.started) { /* do not stop twice */
4004 + dcr = ia64_getreg(_IA64_REG_CR_DCR);
4005 + psr = ia64_getreg(_IA64_REG_PSR);
4007 + ia64_psr(regs)->pp = 0;
4008 + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
4009 + pfm_clear_psr_pp();
4012 + if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) {
4013 + PFM_DBG("disabling idle exclude");
4014 + __get_cpu_var(pfm_syst_info) &= ~PFM_ITA_CPUINFO_IDLE_EXCL;
4020 + * called from pfm_start()
4022 + * Interrupts are masked. Context is locked. Set is the active set.
4025 + * Task is not necessarily current. If not current task, then task
4026 + * is guaranteed stopped and off any cpu. No access to PMU is task
4029 + * For system-wide:
4030 + * task is always current
4032 + * must enable active monitoring.
4034 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
4036 + struct pfm_arch_context *ctx_arch;
4037 + struct pt_regs *regs;
4038 + u64 dcr, dcr_pp, psr_pp;
4041 + ctx_arch = pfm_ctx_arch(ctx);
4042 + regs = task_pt_regs(task);
4043 + flags = ctx->active_set->flags;
4048 + if (!ctx->flags.system) {
4050 + ia64_psr(regs)->up = 1;
4052 + if (task == current) {
4057 + * activate monitoring at next ctxswin
4059 + ctx_arch->ctx_saved_psr_up = IA64_PSR_UP;
4065 + * system-wide mode
4067 + dcr = ia64_getreg(_IA64_REG_CR_DCR);
4068 + if (flags & PFM_ITA_SETFL_INTR_ONLY) {
4071 + } else if (flags & PFM_ITA_SETFL_EXCL_INTR) {
4075 + dcr_pp = psr_pp = 1;
4077 + PFM_DBG("dcr_pp=%lu psr_pp=%lu", dcr_pp, psr_pp);
4080 + * update dcr_pp and psr_pp
4083 + ia64_setreg(_IA64_REG_CR_DCR, dcr | IA64_DCR_PP);
4085 + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
4089 + ia64_psr(regs)->pp = 1;
4091 + pfm_clear_psr_pp();
4092 + ia64_psr(regs)->pp = 0;
4096 + if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) {
4097 + PFM_DBG("enable idle exclude");
4098 + __get_cpu_var(pfm_syst_info) |= PFM_ITA_CPUINFO_IDLE_EXCL;
4103 + * Only call this function when a process is trying to
4104 + * write the debug registers (reading is always allowed)
4105 + * called from arch/ia64/kernel/ptrace.c:access_uarea()
4107 +int __pfm_use_dbregs(struct task_struct *task)
4109 + struct pfm_arch_context *ctx_arch;
4110 + struct pfm_context *ctx;
4111 + unsigned long flags;
4114 + PFM_DBG("called for [%d]", task->pid);
4116 + ctx = task->pfm_context;
4121 + if (task->thread.flags & IA64_THREAD_DBG_VALID) {
4122 + PFM_DBG("IA64_THREAD_DBG_VALID already set");
4126 + spin_lock_irqsave(&ctx->lock, flags);
4127 + ctx_arch = pfm_ctx_arch(ctx);
4129 + if (ctx_arch->flags.use_dbr == 1) {
4130 + PFM_DBG("PMU using dbregs already, no ptrace access");
4133 + spin_unlock_irqrestore(&ctx->lock, flags);
4138 + spin_lock(&pfm_arch_sessions_lock);
4141 + * We cannot allow setting breakpoints when system wide monitoring
4142 + * sessions are using the debug registers.
4144 + if (!pfm_arch_sessions.pfs_sys_use_dbr)
4145 + pfm_arch_sessions.pfs_ptrace_use_dbr++;
4149 + PFM_DBG("ptrace_use_dbr=%u sys_use_dbr=%u by [%d] ret = %d",
4150 + pfm_arch_sessions.pfs_ptrace_use_dbr,
4151 + pfm_arch_sessions.pfs_sys_use_dbr,
4154 + spin_unlock(&pfm_arch_sessions_lock);
4159 + * in UP, we need to check whether the current
4160 + * owner of the PMU is not using the debug registers
4161 + * for monitoring. Because we are using a lazy
4162 + * save on ctxswout, we must force a save in this
4163 + * case because the debug registers are being
4164 + * modified by another task. We save the current
4165 + * PMD registers, and clear ownership. In ctxswin,
4166 + * full state will be reloaded.
4168 + * Note: we overwrite task.
4170 + task = __get_cpu_var(pmu_owner);
4171 + ctx = __get_cpu_var(pmu_ctx);
4176 + ctx_arch = pfm_ctx_arch(ctx);
4178 + if (ctx_arch->flags.use_dbr)
4179 + pfm_save_pmds_release(ctx);
4185 + * This function is called for every task that exits with the
4186 + * IA64_THREAD_DBG_VALID set. This indicates a task which was
4187 + * able to use the debug registers for debugging purposes via
4188 + * ptrace(). Therefore we know it was not using them for
4189 + * perfmormance monitoring, so we only decrement the number
4190 + * of "ptraced" debug register users to keep the count up to date
4192 +int __pfm_release_dbregs(struct task_struct *task)
4196 + spin_lock(&pfm_arch_sessions_lock);
4198 + if (pfm_arch_sessions.pfs_ptrace_use_dbr == 0) {
4199 + PFM_ERR("invalid release for [%d] ptrace_use_dbr=0", task->pid);
4202 + pfm_arch_sessions.pfs_ptrace_use_dbr--;
4205 + spin_unlock(&pfm_arch_sessions_lock);
4210 +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
4211 + struct pfm_event_set *set)
4213 + struct pfm_arch_context *ctx_arch;
4214 + struct task_struct *task;
4215 + struct thread_struct *thread;
4216 + int ret = 0, state;
4217 + int i, can_access_pmu = 0;
4218 + int is_loaded, is_system;
4220 + ctx_arch = pfm_ctx_arch(ctx);
4221 + state = ctx->state;
4223 + is_loaded = state == PFM_CTX_LOADED || state == PFM_CTX_MASKED;
4224 + is_system = ctx->flags.system;
4225 + can_access_pmu = __get_cpu_var(pmu_owner) == task || is_system;
4227 + if (is_loaded == 0)
4230 + if (is_system == 0) {
4231 + thread = &(task->thread);
4234 + * cannot use debug registers for montioring if they are
4235 + * already used for debugging
4237 + if (thread->flags & IA64_THREAD_DBG_VALID) {
4238 + PFM_DBG("debug registers already in use for [%d]",
4245 + * check for debug registers in system wide mode
4247 + spin_lock(&pfm_arch_sessions_lock);
4250 + if (pfm_arch_sessions.pfs_ptrace_use_dbr)
4253 + pfm_arch_sessions.pfs_sys_use_dbr++;
4256 + spin_unlock(&pfm_arch_sessions_lock);
4262 + * clear hardware registers to make sure we don't
4263 + * pick up stale state.
4265 + if (can_access_pmu) {
4266 + PFM_DBG("clearing ibrs, dbrs");
4267 + for (i = 0; i < 8; i++) {
4268 + ia64_set_ibr(i, 0);
4269 + ia64_dv_serialize_instruction();
4272 + for (i = 0; i < 8; i++) {
4273 + ia64_set_dbr(i, 0);
4274 + ia64_dv_serialize_data();
4280 + * debug registers are now in use
4282 + ctx_arch->flags.use_dbr = 1;
4283 + set->priv_flags |= PFM_ITA_SETFL_USE_DBR;
4284 + PFM_DBG("set%u use_dbr=1", set->id);
4287 +EXPORT_SYMBOL(pfm_ia64_mark_dbregs_used);
4289 +char *pfm_arch_get_pmu_module_name(void)
4291 + switch (local_cpu_data->family) {
4293 + return "perfmon_itanium";
4295 + return "perfmon_mckinley";
4297 + return "perfmon_montecito";
4299 + return "perfmon_generic";
4305 + * global arch-specific intialization, called only once
4307 +int __init pfm_arch_init(void)
4311 + spin_lock_init(&pfm_arch_sessions_lock);
4313 +#ifdef CONFIG_IA64_PERFMON_COMPAT
4314 + ret = pfm_ia64_compat_init();
4318 + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
4323 diff --git a/arch/ia64/perfmon/perfmon_compat.c b/arch/ia64/perfmon/perfmon_compat.c
4324 new file mode 100644
4325 index 0000000..2fd3d3c
4327 +++ b/arch/ia64/perfmon/perfmon_compat.c
4330 + * This file implements the IA-64 specific
4331 + * support for the perfmon2 interface
4333 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
4334 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
4336 + * This program is free software; you can redistribute it and/or
4337 + * modify it under the terms of version 2 of the GNU General Public
4338 + * License as published by the Free Software Foundation.
4340 + * This program is distributed in the hope that it will be useful,
4341 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4342 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4343 + * General Public License for more details.
4345 + * You should have received a copy of the GNU General Public License
4346 + * along with this program; if not, write to the Free Software
4347 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
4350 +#include <linux/interrupt.h>
4351 +#include <linux/module.h>
4352 +#include <linux/file.h>
4353 +#include <linux/fdtable.h>
4354 +#include <linux/seq_file.h>
4355 +#include <linux/vmalloc.h>
4356 +#include <linux/proc_fs.h>
4357 +#include <linux/perfmon_kern.h>
4358 +#include <linux/uaccess.h>
4360 +asmlinkage long sys_pfm_stop(int fd);
4361 +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *st);
4362 +asmlinkage long sys_pfm_unload_context(int fd);
4363 +asmlinkage long sys_pfm_restart(int fd);
4364 +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ld);
4366 +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what);
4368 +extern ssize_t __pfm_read(struct pfm_context *ctx,
4369 + union pfarg_msg *msg_buf,
4372 + * function providing some help for backward compatiblity with old IA-64
4373 + * applications. In the old model, certain attributes of a counter were
4374 + * passed via the PMC, now they are passed via the PMD.
4376 +static int pfm_compat_update_pmd(struct pfm_context *ctx, u16 set_id, u16 cnum,
4378 + unsigned long *smpl_pmds,
4379 + unsigned long *reset_pmds,
4382 + struct pfm_event_set *set;
4384 + unsigned long *impl_pmds;
4388 + impl_pmds = ctx->regs.pmds;
4389 + max_pmd = ctx->regs.max_pmd;
4392 + * given that we do not maintain PMC ->PMD dependencies
4393 + * we cannot figure out what to do in case PMCxx != PMDxx
4395 + if (cnum > max_pmd)
4399 + * assumes PMCxx controls PMDxx which is always true for counters
4400 + * on Itanium PMUs.
4402 + is_counting = pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64;
4403 + set = pfm_find_set(ctx, set_id, 0);
4406 + * for v2.0, we only allowed counting PMD to generate
4407 + * user-level notifications. Same thing with randomization.
4409 + if (is_counting) {
4410 + if (rflags & PFM_REGFL_OVFL_NOTIFY)
4411 + flags |= PFM_REGFL_OVFL_NOTIFY;
4412 + if (rflags & PFM_REGFL_RANDOM)
4413 + flags |= PFM_REGFL_RANDOM;
4415 + * verify validity of smpl_pmds
4417 + if (unlikely(bitmap_subset(smpl_pmds,
4418 + impl_pmds, max_pmd) == 0)) {
4419 + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u",
4420 + (unsigned long long)smpl_pmds[0], cnum);
4424 + * verify validity of reset_pmds
4426 + if (unlikely(bitmap_subset(reset_pmds,
4427 + impl_pmds, max_pmd) == 0)) {
4428 + PFM_DBG("invalid reset_pmds=0x%lx for pmd%u",
4429 + reset_pmds[0], cnum);
4433 + * ensures that a PFM_READ_PMDS succeeds with a
4434 + * corresponding PFM_WRITE_PMDS
4436 + __set_bit(cnum, set->used_pmds);
4438 + } else if (rflags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
4439 + PFM_DBG("cannot set ovfl_notify or random on pmd%u", cnum);
4443 + set->pmds[cnum].flags = flags;
4445 + if (is_counting) {
4446 + bitmap_copy(set->pmds[cnum].reset_pmds,
4450 + bitmap_copy(set->pmds[cnum].smpl_pmds,
4454 + set->pmds[cnum].eventid = eventid;
4457 + * update ovfl_notify
4459 + if (rflags & PFM_REGFL_OVFL_NOTIFY)
4460 + __set_bit(cnum, set->ovfl_notify);
4462 + __clear_bit(cnum, set->ovfl_notify);
4465 + PFM_DBG("pmd%u flags=0x%x eventid=0x%lx r_pmds=0x%lx s_pmds=0x%lx",
4475 +int __pfm_write_ibrs_old(struct pfm_context *ctx, void *arg, int count)
4477 + struct pfarg_dbreg *req = arg;
4478 + struct pfarg_pmc pmc;
4481 + memset(&pmc, 0, sizeof(pmc));
4483 + for (i = 0; i < count; i++, req++) {
4484 + pmc.reg_num = 256+req->dbreg_num;
4485 + pmc.reg_value = req->dbreg_value;
4486 + pmc.reg_flags = 0;
4487 + pmc.reg_set = req->dbreg_set;
4489 + ret = __pfm_write_pmcs(ctx, &pmc, 1);
4491 + req->dbreg_flags &= ~PFM_REG_RETFL_MASK;
4492 + req->dbreg_flags |= pmc.reg_flags;
4500 +static long pfm_write_ibrs_old(int fd, void __user *ureq, int count)
4502 + struct pfm_context *ctx;
4503 + struct task_struct *task;
4504 + struct file *filp;
4505 + struct pfarg_dbreg *req = NULL;
4506 + void *fptr, *resume;
4507 + unsigned long flags;
4509 + int ret, fput_needed;
4511 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4514 + sz = count*sizeof(*req);
4516 + filp = fget_light(fd, &fput_needed);
4517 + if (unlikely(filp == NULL)) {
4518 + PFM_DBG("invalid fd %d", fd);
4522 + ctx = filp->private_data;
4525 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4526 + PFM_DBG("fd %d not related to perfmon", fd);
4530 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4534 + spin_lock_irqsave(&ctx->lock, flags);
4538 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4540 + ret = __pfm_write_ibrs_old(ctx, req, count);
4542 + spin_unlock_irqrestore(&ctx->lock, flags);
4545 + pfm_resume_task(task, resume);
4547 + if (copy_to_user(ureq, req, sz))
4552 + fput_light(filp, fput_needed);
4556 +int __pfm_write_dbrs_old(struct pfm_context *ctx, void *arg, int count)
4558 + struct pfarg_dbreg *req = arg;
4559 + struct pfarg_pmc pmc;
4562 + memset(&pmc, 0, sizeof(pmc));
4564 + for (i = 0; i < count; i++, req++) {
4565 + pmc.reg_num = 264+req->dbreg_num;
4566 + pmc.reg_value = req->dbreg_value;
4567 + pmc.reg_flags = 0;
4568 + pmc.reg_set = req->dbreg_set;
4570 + ret = __pfm_write_pmcs(ctx, &pmc, 1);
4572 + req->dbreg_flags &= ~PFM_REG_RETFL_MASK;
4573 + req->dbreg_flags |= pmc.reg_flags;
4580 +static long pfm_write_dbrs_old(int fd, void __user *ureq, int count)
4582 + struct pfm_context *ctx;
4583 + struct task_struct *task;
4584 + struct file *filp;
4585 + struct pfarg_dbreg *req = NULL;
4586 + void *fptr, *resume;
4587 + unsigned long flags;
4589 + int ret, fput_needed;
4591 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4594 + sz = count*sizeof(*req);
4596 + filp = fget_light(fd, &fput_needed);
4597 + if (unlikely(filp == NULL)) {
4598 + PFM_DBG("invalid fd %d", fd);
4602 + ctx = filp->private_data;
4605 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4606 + PFM_DBG("fd %d not related to perfmon", fd);
4610 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4614 + spin_lock_irqsave(&ctx->lock, flags);
4618 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4620 + ret = __pfm_write_dbrs_old(ctx, req, count);
4622 + spin_unlock_irqrestore(&ctx->lock, flags);
4625 + pfm_resume_task(task, resume);
4627 + if (copy_to_user(ureq, req, sz))
4632 + fput_light(filp, fput_needed);
4636 +int __pfm_write_pmcs_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
4639 + struct pfarg_pmc req;
4641 + int ret, error_code;
4643 + memset(&req, 0, sizeof(req));
4645 + for (i = 0; i < count; i++, req_old++) {
4646 + req.reg_num = req_old->reg_num;
4647 + req.reg_set = req_old->reg_set;
4648 + req.reg_flags = 0;
4649 + req.reg_value = req_old->reg_value;
4651 + ret = __pfm_write_pmcs(ctx, (void *)&req, 1);
4652 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4653 + req_old->reg_flags |= req.reg_flags;
4658 + ret = pfm_compat_update_pmd(ctx, req_old->reg_set,
4660 + (u32)req_old->reg_flags,
4661 + req_old->reg_smpl_pmds,
4662 + req_old->reg_reset_pmds,
4663 + req_old->reg_smpl_eventid);
4665 + error_code = ret ? PFM_REG_RETFL_EINVAL : 0;
4666 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4667 + req_old->reg_flags |= error_code;
4675 +static long pfm_write_pmcs_old(int fd, void __user *ureq, int count)
4677 + struct pfm_context *ctx;
4678 + struct task_struct *task;
4679 + struct file *filp;
4680 + struct pfarg_reg *req = NULL;
4681 + void *fptr, *resume;
4682 + unsigned long flags;
4684 + int ret, fput_needed;
4686 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4689 + sz = count*sizeof(*req);
4691 + filp = fget_light(fd, &fput_needed);
4692 + if (unlikely(filp == NULL)) {
4693 + PFM_DBG("invalid fd %d", fd);
4697 + ctx = filp->private_data;
4700 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4701 + PFM_DBG("fd %d not related to perfmon", fd);
4705 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4709 + spin_lock_irqsave(&ctx->lock, flags);
4713 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4715 + ret = __pfm_write_pmcs_old(ctx, req, count);
4717 + spin_unlock_irqrestore(&ctx->lock, flags);
4720 + pfm_resume_task(task, resume);
4722 + if (copy_to_user(ureq, req, sz))
4728 + fput_light(filp, fput_needed);
4732 +int __pfm_write_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
4735 + struct pfarg_pmd req;
4738 + memset(&req, 0, sizeof(req));
4740 + for (i = 0; i < count; i++, req_old++) {
4741 + req.reg_num = req_old->reg_num;
4742 + req.reg_set = req_old->reg_set;
4743 + req.reg_value = req_old->reg_value;
4744 + /* flags passed with pmcs in v2.0 */
4746 + req.reg_long_reset = req_old->reg_long_reset;
4747 + req.reg_short_reset = req_old->reg_short_reset;
4748 + req.reg_random_mask = req_old->reg_random_mask;
4750 + * reg_random_seed is ignored since v2.3
4754 + * skip last_reset_val not used for writing
4755 + * skip smpl_pmds, reset_pmds, eventid, ovfl_swtch_cnt
4756 + * as set in pfm_write_pmcs_old.
4758 + * ovfl_switch_cnt ignored, not implemented in v2.0
4760 + ret = __pfm_write_pmds(ctx, (void *)&req, 1, 1);
4762 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4763 + req_old->reg_flags |= req.reg_flags;
4771 +static long pfm_write_pmds_old(int fd, void __user *ureq, int count)
4773 + struct pfm_context *ctx;
4774 + struct task_struct *task;
4775 + struct file *filp;
4776 + struct pfarg_reg *req = NULL;
4777 + void *fptr, *resume;
4778 + unsigned long flags;
4780 + int ret, fput_needed;
4782 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4785 + sz = count*sizeof(*req);
4787 + filp = fget_light(fd, &fput_needed);
4788 + if (unlikely(filp == NULL)) {
4789 + PFM_DBG("invalid fd %d", fd);
4793 + ctx = filp->private_data;
4796 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4797 + PFM_DBG("fd %d not related to perfmon", fd);
4801 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4805 + spin_lock_irqsave(&ctx->lock, flags);
4809 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4811 + ret = __pfm_write_pmds_old(ctx, req, count);
4813 + spin_unlock_irqrestore(&ctx->lock, flags);
4815 + if (copy_to_user(ureq, req, sz))
4819 + pfm_resume_task(task, resume);
4823 + fput_light(filp, fput_needed);
4827 +int __pfm_read_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
4830 + struct pfarg_pmd req;
4833 + memset(&req, 0, sizeof(req));
4835 + for (i = 0; i < count; i++, req_old++) {
4836 + req.reg_num = req_old->reg_num;
4837 + req.reg_set = req_old->reg_set;
4839 + /* skip value not used for reading */
4840 + req.reg_flags = req_old->reg_flags;
4842 + /* skip short/long_reset not used for reading */
4843 + /* skip last_reset_val not used for reading */
4844 + /* skip ovfl_switch_cnt not used for reading */
4846 + ret = __pfm_read_pmds(ctx, (void *)&req, 1);
4848 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4849 + req_old->reg_flags |= req.reg_flags;
4853 + /* update fields */
4854 + req_old->reg_value = req.reg_value;
4856 + req_old->reg_last_reset_val = req.reg_last_reset_val;
4857 + req_old->reg_ovfl_switch_cnt = req.reg_ovfl_switch_cnt;
4862 +static long pfm_read_pmds_old(int fd, void __user *ureq, int count)
4864 + struct pfm_context *ctx;
4865 + struct task_struct *task;
4866 + struct file *filp;
4867 + struct pfarg_reg *req = NULL;
4868 + void *fptr, *resume;
4869 + unsigned long flags;
4871 + int ret, fput_needed;
4873 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4876 + sz = count*sizeof(*req);
4878 + filp = fget_light(fd, &fput_needed);
4879 + if (unlikely(filp == NULL)) {
4880 + PFM_DBG("invalid fd %d", fd);
4884 + ctx = filp->private_data;
4887 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4888 + PFM_DBG("fd %d not related to perfmon", fd);
4892 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4896 + spin_lock_irqsave(&ctx->lock, flags);
4900 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4902 + ret = __pfm_read_pmds_old(ctx, req, count);
4904 + spin_unlock_irqrestore(&ctx->lock, flags);
4907 + pfm_resume_task(task, resume);
4909 + if (copy_to_user(ureq, req, sz))
4914 + fput_light(filp, fput_needed);
4919 + * OBSOLETE: use /proc/perfmon_map instead
4921 +static long pfm_get_default_pmcs_old(int fd, void __user *ureq, int count)
4923 + struct pfarg_reg *req = NULL;
4927 + unsigned int cnum;
4933 + * ensure the pfm_pmu_conf does not disappear while
4936 + ret = pfm_pmu_conf_get(1);
4940 + sz = count*sizeof(*ureq);
4942 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4947 + for (i = 0; i < count; i++, req++) {
4948 + cnum = req->reg_num;
4950 + if (i >= PFM_MAX_PMCS ||
4951 + (pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_I) == 0) {
4952 + req->reg_flags = PFM_REG_RETFL_EINVAL;
4955 + req->reg_value = pfm_pmu_conf->pmc_desc[cnum].dfl_val;
4956 + req->reg_flags = 0;
4958 + PFM_DBG("pmc[%u]=0x%lx", cnum, req->reg_value);
4961 + if (copy_to_user(ureq, req, sz))
4966 + pfm_pmu_conf_put();
4972 + * allocate a sampling buffer and remaps it into the user address space of
4973 + * the task. This is only in compatibility mode
4975 + * function called ONLY on current task
4977 +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, size_t rsize,
4978 + struct file *filp)
4980 + struct mm_struct *mm = current->mm;
4981 + struct vm_area_struct *vma = NULL;
4982 + struct pfm_arch_context *ctx_arch;
4985 + extern struct vm_operations_struct pfm_buf_map_vm_ops;
4987 + ctx_arch = pfm_ctx_arch(ctx);
4990 + * allocate buffer + map desc
4992 + ret = pfm_smpl_buf_alloc(ctx, rsize);
4996 + size = ctx->smpl_size;
4999 + /* allocate vma */
5000 + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
5002 + PFM_DBG("Cannot allocate vma");
5005 + memset(vma, 0, sizeof(*vma));
5008 + * partially initialize the vma for the sampling buffer
5011 + vma->vm_flags = VM_READ | VM_MAYREAD | VM_RESERVED;
5012 + vma->vm_page_prot = PAGE_READONLY;
5013 + vma->vm_ops = &pfm_buf_map_vm_ops;
5014 + vma->vm_file = filp;
5015 + vma->vm_private_data = ctx;
5016 + vma->vm_pgoff = 0;
5019 + * simulate effect of mmap()
5024 + * Let's do the difficult operations next.
5026 + * now we atomically find some area in the address space and
5027 + * remap the buffer into it.
5029 + down_write(¤t->mm->mmap_sem);
5031 + /* find some free area in address space, must have mmap sem held */
5032 + vma->vm_start = get_unmapped_area(NULL, 0, size, 0,
5033 + MAP_PRIVATE|MAP_ANONYMOUS);
5034 + if (vma->vm_start == 0) {
5035 + PFM_DBG("cannot find unmapped area of size %zu", size);
5036 + up_write(¤t->mm->mmap_sem);
5039 + vma->vm_end = vma->vm_start + size;
5041 + PFM_DBG("aligned_size=%zu mapped @0x%lx", size, vma->vm_start);
5043 + * now insert the vma in the vm list for the process, must be
5044 + * done with mmap lock held
5046 + insert_vm_struct(mm, vma);
5048 + mm->total_vm += size >> PAGE_SHIFT;
5050 + up_write(¤t->mm->mmap_sem);
5053 + * IMPORTANT: we do not issue the fput()
5054 + * because we want to increase the ref count
5055 + * on the descriptor to simulate what mmap()
5060 + * used to propagate vaddr to syscall stub
5062 + ctx_arch->ctx_smpl_vaddr = (void *)vma->vm_start;
5066 + kmem_cache_free(vm_area_cachep, vma);
5068 + pfm_smpl_buf_space_release(ctx, ctx->smpl_size);
5069 + vfree(ctx->smpl_addr);
5073 +#define PFM_DEFAULT_SMPL_UUID { \
5074 + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\
5075 + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
5077 +static pfm_uuid_t old_default_uuid = PFM_DEFAULT_SMPL_UUID;
5078 +static pfm_uuid_t null_uuid;
5081 + * function invoked in case, pfm_context_create fails
5082 + * at the last operation, copy_to_user. It needs to
5083 + * undo memory allocations and free the file descriptor
5085 +static void pfm_undo_create_context_fd(int fd, struct pfm_context *ctx)
5087 + struct files_struct *files = current->files;
5088 + struct file *file;
5091 + file = fget_light(fd, &fput_needed);
5093 + * there is no fd_uninstall(), so we do it
5094 + * here. put_unused_fd() does not remove the
5095 + * effect of fd_install().
5098 + spin_lock(&files->file_lock);
5099 + files->fd_array[fd] = NULL;
5100 + spin_unlock(&files->file_lock);
5102 + fput_light(file, fput_needed);
5105 + * decrement ref count and kill file
5109 + put_unused_fd(fd);
5111 + pfm_free_context(ctx);
5114 +static int pfm_get_smpl_arg_old(pfm_uuid_t uuid, void __user *fmt_uarg,
5115 + size_t usize, void **arg,
5116 + struct pfm_smpl_fmt **fmt)
5118 + struct pfm_smpl_fmt *f;
5119 + void *addr = NULL;
5123 + if (!memcmp(uuid, null_uuid, sizeof(pfm_uuid_t)))
5126 + if (memcmp(uuid, old_default_uuid, sizeof(pfm_uuid_t))) {
5127 + PFM_DBG("compatibility mode supports only default sampling format");
5131 + * find fmt and increase refcount
5133 + f = pfm_smpl_fmt_get("default-old");
5135 + PFM_DBG("default-old buffer format not found");
5140 + * expected format argument size
5142 + sz = f->fmt_arg_size;
5145 + * check user size matches expected size
5146 + * usize = -1 is for IA-64 backward compatibility
5149 + if (sz != usize && usize != -1) {
5150 + PFM_DBG("invalid arg size %zu, format expects %zu",
5156 + addr = kmalloc(sz, GFP_KERNEL);
5161 + if (copy_from_user(addr, fmt_uarg, sz))
5170 + pfm_smpl_fmt_put(f);
5174 +static long pfm_create_context_old(int fd, void __user *ureq, int count)
5176 + struct pfm_context *new_ctx;
5177 + struct pfm_arch_context *ctx_arch;
5178 + struct pfm_smpl_fmt *fmt = NULL;
5179 + struct pfarg_context req_old;
5180 + void __user *usmpl_arg;
5181 + void *smpl_arg = NULL;
5182 + struct pfarg_ctx req;
5188 + if (copy_from_user(&req_old, ureq, sizeof(req_old)))
5191 + memset(&req, 0, sizeof(req));
5194 + * sampling format args are following pfarg_context
5196 + usmpl_arg = ureq+sizeof(req_old);
5198 + ret = pfm_get_smpl_arg_old(req_old.ctx_smpl_buf_id, usmpl_arg, -1,
5203 + req.ctx_flags = req_old.ctx_flags;
5206 + * returns file descriptor if >=0, or error code */
5207 + ret = __pfm_create_context(&req, fmt, smpl_arg, PFM_COMPAT, &new_ctx);
5209 + ctx_arch = pfm_ctx_arch(new_ctx);
5210 + req_old.ctx_fd = ret;
5211 + req_old.ctx_smpl_vaddr = ctx_arch->ctx_smpl_vaddr;
5214 + if (copy_to_user(ureq, &req_old, sizeof(req_old))) {
5215 + pfm_undo_create_context_fd(req_old.ctx_fd, new_ctx);
5225 + * obsolete call: use /proc/perfmon
5227 +static long pfm_get_features_old(int fd, void __user *arg, int count)
5229 + struct pfarg_features req;
5235 + memset(&req, 0, sizeof(req));
5237 + req.ft_version = PFM_VERSION;
5239 + if (copy_to_user(arg, &req, sizeof(req)))
5245 +static long pfm_debug_old(int fd, void __user *arg, int count)
5252 + if (get_user(m, (int __user *)arg))
5256 + pfm_controls.debug = m == 0 ? 0 : 1;
5258 + PFM_INFO("debugging %s (timing reset)",
5259 + pfm_controls.debug ? "on" : "off");
5262 + for_each_online_cpu(m) {
5263 + memset(&per_cpu(pfm_stats, m), 0,
5264 + sizeof(struct pfm_stats));
5269 +static long pfm_unload_context_old(int fd, void __user *arg, int count)
5274 + return sys_pfm_unload_context(fd);
5277 +static long pfm_restart_old(int fd, void __user *arg, int count)
5282 + return sys_pfm_restart(fd);
5285 +static long pfm_stop_old(int fd, void __user *arg, int count)
5290 + return sys_pfm_stop(fd);
5293 +static long pfm_start_old(int fd, void __user *arg, int count)
5298 + return sys_pfm_start(fd, arg);
5301 +static long pfm_load_context_old(int fd, void __user *ureq, int count)
5306 + return sys_pfm_load_context(fd, ureq);
5310 + * perfmon command descriptions
5312 +struct pfm_cmd_desc {
5313 + long (*cmd_func)(int fd, void __user *arg, int count);
5317 + * functions MUST be listed in the increasing order of
5318 + * their index (see permfon.h)
5320 +#define PFM_CMD(name) \
5321 + { .cmd_func = name, \
5323 +#define PFM_CMD_NONE \
5324 + { .cmd_func = NULL \
5327 +static struct pfm_cmd_desc pfm_cmd_tab[] = {
5328 +/* 0 */PFM_CMD_NONE,
5329 +/* 1 */PFM_CMD(pfm_write_pmcs_old),
5330 +/* 2 */PFM_CMD(pfm_write_pmds_old),
5331 +/* 3 */PFM_CMD(pfm_read_pmds_old),
5332 +/* 4 */PFM_CMD(pfm_stop_old),
5333 +/* 5 */PFM_CMD(pfm_start_old),
5334 +/* 6 */PFM_CMD_NONE,
5335 +/* 7 */PFM_CMD_NONE,
5336 +/* 8 */PFM_CMD(pfm_create_context_old),
5337 +/* 9 */PFM_CMD_NONE,
5338 +/* 10 */PFM_CMD(pfm_restart_old),
5339 +/* 11 */PFM_CMD_NONE,
5340 +/* 12 */PFM_CMD(pfm_get_features_old),
5341 +/* 13 */PFM_CMD(pfm_debug_old),
5342 +/* 14 */PFM_CMD_NONE,
5343 +/* 15 */PFM_CMD(pfm_get_default_pmcs_old),
5344 +/* 16 */PFM_CMD(pfm_load_context_old),
5345 +/* 17 */PFM_CMD(pfm_unload_context_old),
5346 +/* 18 */PFM_CMD_NONE,
5347 +/* 19 */PFM_CMD_NONE,
5348 +/* 20 */PFM_CMD_NONE,
5349 +/* 21 */PFM_CMD_NONE,
5350 +/* 22 */PFM_CMD_NONE,
5351 +/* 23 */PFM_CMD_NONE,
5352 +/* 24 */PFM_CMD_NONE,
5353 +/* 25 */PFM_CMD_NONE,
5354 +/* 26 */PFM_CMD_NONE,
5355 +/* 27 */PFM_CMD_NONE,
5356 +/* 28 */PFM_CMD_NONE,
5357 +/* 29 */PFM_CMD_NONE,
5358 +/* 30 */PFM_CMD_NONE,
5359 +/* 31 */PFM_CMD_NONE,
5360 +/* 32 */PFM_CMD(pfm_write_ibrs_old),
5361 +/* 33 */PFM_CMD(pfm_write_dbrs_old),
5363 +#define PFM_CMD_COUNT ARRAY_SIZE(pfm_cmd_tab)
5366 + * system-call entry point (must return long)
5368 +asmlinkage long sys_perfmonctl(int fd, int cmd, void __user *arg, int count)
5370 + if (perfmon_disabled)
5373 + if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT
5374 + || pfm_cmd_tab[cmd].cmd_func == NULL)) {
5375 + PFM_DBG("invalid cmd=%d", cmd);
5378 + return (long)pfm_cmd_tab[cmd].cmd_func(fd, arg, count);
5382 + * Called from pfm_read() for a perfmon v2.0 context.
5384 + * compatibility mode pfm_read() routine. We need a separate
5385 + * routine because the definition of the message has changed.
5386 + * The pfm_msg and pfarg_msg structures are different.
5388 + * return: sizeof(pfm_msg_t) on success, -errno otherwise
5390 +ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
5395 + union pfarg_msg msg_buf;
5396 + pfm_msg_t old_msg_buf;
5397 + pfm_ovfl_msg_t *o_msg;
5398 + struct pfarg_ovfl_msg *n_msg;
5401 + PFM_DBG("msg=%p size=%zu", buf, size);
5404 + * cannot extract partial messages.
5405 + * check even when there is no message
5407 + * cannot extract more than one message per call. Bytes
5408 + * above sizeof(msg) are ignored.
5410 + if (size < sizeof(old_msg_buf)) {
5411 + PFM_DBG("message is too small size=%zu must be >=%zu)",
5413 + sizeof(old_msg_buf));
5417 + ret = __pfm_read(ctx, &msg_buf, non_block);
5422 + * force return value to old message size
5424 + ret = sizeof(old_msg_buf);
5426 + o_msg = &old_msg_buf.pfm_ovfl_msg;
5427 + n_msg = &msg_buf.pfm_ovfl_msg;
5429 + switch (msg_buf.type) {
5430 + case PFM_MSG_OVFL:
5431 + o_msg->msg_type = PFM_MSG_OVFL;
5432 + o_msg->msg_ctx_fd = 0;
5433 + o_msg->msg_active_set = n_msg->msg_active_set;
5434 + o_msg->msg_tstamp = 0;
5436 + o_msg->msg_ovfl_pmds[0] = n_msg->msg_ovfl_pmds[0];
5437 + o_msg->msg_ovfl_pmds[1] = n_msg->msg_ovfl_pmds[1];
5438 + o_msg->msg_ovfl_pmds[2] = n_msg->msg_ovfl_pmds[2];
5439 + o_msg->msg_ovfl_pmds[3] = n_msg->msg_ovfl_pmds[3];
5442 + o_msg->msg_type = PFM_MSG_END;
5443 + o_msg->msg_ctx_fd = 0;
5444 + o_msg->msg_tstamp = 0;
5447 + PFM_DBG("unknown msg type=%d", msg_buf.type);
5449 + if (copy_to_user(buf, &old_msg_buf, sizeof(old_msg_buf)))
5451 + PFM_DBG_ovfl("ret=%d", ret);
5456 + * legacy /proc/perfmon simplified interface (we only maintain the
5457 + * global information (no more per-cpu stats, use
5458 + * /sys/devices/system/cpu/cpuXX/perfmon
5460 +static struct proc_dir_entry *perfmon_proc;
5462 +static void *pfm_proc_start(struct seq_file *m, loff_t *pos)
5470 +static void *pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
5473 + return pfm_proc_start(m, pos);
5476 +static void pfm_proc_stop(struct seq_file *m, void *v)
5481 + * this is a simplified version of the legacy /proc/perfmon.
5482 + * We have retained ONLY the key information that tools are actually
5485 +static void pfm_proc_show_header(struct seq_file *m)
5489 + pfm_sysfs_res_show(buf, sizeof(buf), 3);
5491 + seq_printf(m, "perfmon version : %u.%u\n",
5492 + PFM_VERSION_MAJ, PFM_VERSION_MIN);
5494 + seq_printf(m, "model : %s", buf);
5497 +static int pfm_proc_show(struct seq_file *m, void *v)
5499 + pfm_proc_show_header(m);
5503 +struct seq_operations pfm_proc_seq_ops = {
5504 + .start = pfm_proc_start,
5505 + .next = pfm_proc_next,
5506 + .stop = pfm_proc_stop,
5507 + .show = pfm_proc_show
5510 +static int pfm_proc_open(struct inode *inode, struct file *file)
5512 + return seq_open(file, &pfm_proc_seq_ops);
5516 +static struct file_operations pfm_proc_fops = {
5517 + .open = pfm_proc_open,
5519 + .llseek = seq_lseek,
5520 + .release = seq_release,
5524 + * called from pfm_arch_init(), global initialization, called once
5526 +int __init pfm_ia64_compat_init(void)
5529 + * create /proc/perfmon
5531 + perfmon_proc = create_proc_entry("perfmon", S_IRUGO, NULL);
5532 + if (perfmon_proc == NULL) {
5533 + PFM_ERR("cannot create /proc entry, perfmon disabled");
5536 + perfmon_proc->proc_fops = &pfm_proc_fops;
5539 diff --git a/arch/ia64/perfmon/perfmon_default_smpl.c b/arch/ia64/perfmon/perfmon_default_smpl.c
5540 new file mode 100644
5541 index 0000000..b408a13
5543 +++ b/arch/ia64/perfmon/perfmon_default_smpl.c
5546 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
5547 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
5549 + * This file implements the old default sampling buffer format
5550 + * for the Linux/ia64 perfmon-2 subsystem. This is for backward
5551 + * compatibility only. use the new default format in perfmon/
5553 + * This program is free software; you can redistribute it and/or
5554 + * modify it under the terms of version 2 of the GNU General Public
5555 + * License as published by the Free Software Foundation.
5557 + * This program is distributed in the hope that it will be useful,
5558 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5559 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5560 + * General Public License for more details.
5562 + * You should have received a copy of the GNU General Public License
5563 + * along with this program; if not, write to the Free Software
5564 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
5567 +#include <linux/kernel.h>
5568 +#include <linux/types.h>
5569 +#include <linux/module.h>
5570 +#include <linux/init.h>
5571 +#include <linux/delay.h>
5572 +#include <linux/smp.h>
5573 +#include <linux/sysctl.h>
5576 +#define FMT_FLAGS 0
5578 +#define FMT_FLAGS PFM_FMTFL_IS_BUILTIN
5581 +#include <linux/perfmon_kern.h>
5582 +#include <asm/perfmon_default_smpl.h>
5584 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
5585 +MODULE_DESCRIPTION("perfmon old default sampling format");
5586 +MODULE_LICENSE("GPL");
5588 +static int pfm_default_fmt_validate(u32 flags, u16 npmds, void *data)
5590 + struct pfm_default_smpl_arg *arg = data;
5591 + size_t min_buf_size;
5593 + if (data == NULL) {
5594 + PFM_DBG("no argument passed");
5599 + * compute min buf size. All PMD are manipulated as 64bit entities
5601 + min_buf_size = sizeof(struct pfm_default_smpl_hdr)
5602 + + (sizeof(struct pfm_default_smpl_entry) + (npmds*sizeof(u64)));
5604 + PFM_DBG("validate flags=0x%x npmds=%u min_buf_size=%lu "
5605 + "buf_size=%lu CPU%d", flags, npmds, min_buf_size,
5606 + arg->buf_size, smp_processor_id());
5609 + * must hold at least the buffer header + one minimally sized entry
5611 + if (arg->buf_size < min_buf_size)
5617 +static int pfm_default_fmt_get_size(unsigned int flags, void *data,
5620 + struct pfm_default_smpl_arg *arg = data;
5623 + * size has been validated in default_validate
5625 + *size = arg->buf_size;
5630 +static int pfm_default_fmt_init(struct pfm_context *ctx, void *buf,
5631 + u32 flags, u16 npmds, void *data)
5633 + struct pfm_default_smpl_hdr *hdr;
5634 + struct pfm_default_smpl_arg *arg = data;
5638 + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
5639 + hdr->hdr_buf_size = arg->buf_size;
5640 + hdr->hdr_cur_offs = sizeof(*hdr);
5641 + hdr->hdr_overflows = 0;
5642 + hdr->hdr_count = 0;
5644 + PFM_DBG("buffer=%p buf_size=%lu hdr_size=%lu "
5645 + "hdr_version=%u cur_offs=%lu",
5647 + hdr->hdr_buf_size,
5650 + hdr->hdr_cur_offs);
5655 +static int pfm_default_fmt_handler(struct pfm_context *ctx,
5656 + unsigned long ip, u64 tstamp, void *data)
5658 + struct pfm_default_smpl_hdr *hdr;
5659 + struct pfm_default_smpl_entry *ent;
5660 + void *cur, *last, *buf;
5662 + size_t entry_size;
5663 + u16 npmds, i, ovfl_pmd;
5664 + struct pfm_ovfl_arg *arg;
5666 + hdr = ctx->smpl_addr;
5667 + arg = &ctx->ovfl_arg;
5670 + cur = buf+hdr->hdr_cur_offs;
5671 + last = buf+hdr->hdr_buf_size;
5672 + ovfl_pmd = arg->ovfl_pmd;
5675 + * precheck for sanity
5677 + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE)
5680 + npmds = arg->num_smpl_pmds;
5684 + prefetch(arg->smpl_pmds_values);
5686 + entry_size = sizeof(*ent) + (npmds << 3);
5688 + /* position for first pmd */
5689 + e = (unsigned long *)(ent+1);
5693 + PFM_DBG_ovfl("count=%lu cur=%p last=%p free_bytes=%lu "
5694 + "ovfl_pmd=%d npmds=%u",
5702 + * current = task running at the time of the overflow.
5705 + * - this is ususally the task being monitored.
5706 + * Under certain conditions, it might be a different task
5709 + * - this is not necessarily the task controlling the session
5711 + ent->pid = current->pid;
5712 + ent->ovfl_pmd = ovfl_pmd;
5713 + ent->last_reset_val = arg->pmd_last_reset;
5716 + * where did the fault happen (includes slot number)
5720 + ent->tstamp = tstamp;
5721 + ent->cpu = smp_processor_id();
5722 + ent->set = arg->active_set;
5723 + ent->tgid = current->tgid;
5726 + * selectively store PMDs in increasing index number
5729 + u64 *val = arg->smpl_pmds_values;
5730 + for (i = 0; i < npmds; i++)
5735 + * update position for next entry
5737 + hdr->hdr_cur_offs += entry_size;
5738 + cur += entry_size;
5741 + * post check to avoid losing the last sample
5743 + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE)
5747 + * reset before returning from interrupt handler
5749 + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
5752 + PFM_DBG_ovfl("smpl buffer full free=%lu, count=%lu",
5753 + last-cur, hdr->hdr_count);
5756 + * increment number of buffer overflow.
5757 + * important to detect duplicate set of samples.
5759 + hdr->hdr_overflows++;
5762 + * request notification and masking of monitoring.
5763 + * Notification is still subject to the overflowed
5765 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
5767 + return -ENOBUFS; /* we are full, sorry */
5770 +static int pfm_default_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf)
5772 + struct pfm_default_smpl_hdr *hdr;
5776 + hdr->hdr_count = 0;
5777 + hdr->hdr_cur_offs = sizeof(*hdr);
5779 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
5784 +static int pfm_default_fmt_exit(void *buf)
5789 +static struct pfm_smpl_fmt default_fmt = {
5790 + .fmt_name = "default-old",
5791 + .fmt_version = 0x10000,
5792 + .fmt_arg_size = sizeof(struct pfm_default_smpl_arg),
5793 + .fmt_validate = pfm_default_fmt_validate,
5794 + .fmt_getsize = pfm_default_fmt_get_size,
5795 + .fmt_init = pfm_default_fmt_init,
5796 + .fmt_handler = pfm_default_fmt_handler,
5797 + .fmt_restart = pfm_default_fmt_restart,
5798 + .fmt_exit = pfm_default_fmt_exit,
5799 + .fmt_flags = FMT_FLAGS,
5800 + .owner = THIS_MODULE
5803 +static int pfm_default_fmt_init_module(void)
5807 + return pfm_fmt_register(&default_fmt);
5811 +static void pfm_default_fmt_cleanup_module(void)
5813 + pfm_fmt_unregister(&default_fmt);
5816 +module_init(pfm_default_fmt_init_module);
5817 +module_exit(pfm_default_fmt_cleanup_module);
5818 diff --git a/arch/ia64/perfmon/perfmon_generic.c b/arch/ia64/perfmon/perfmon_generic.c
5819 new file mode 100644
5820 index 0000000..47b1870
5822 +++ b/arch/ia64/perfmon/perfmon_generic.c
5825 + * This file contains the generic PMU register description tables
5826 + * and pmc checker used by perfmon.c.
5828 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
5829 + * contributed by Stephane Eranian <eranian@hpl.hp.com>
5831 + * This program is free software; you can redistribute it and/or
5832 + * modify it under the terms of version 2 of the GNU General Public
5833 + * License as published by the Free Software Foundation.
5835 + * This program is distributed in the hope that it will be useful,
5836 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5837 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5838 + * General Public License for more details.
5840 + * You should have received a copy of the GNU General Public License
5841 + * along with this program; if not, write to the Free Software
5842 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
5845 +#include <linux/module.h>
5846 +#include <linux/perfmon_kern.h>
5847 +#include <asm/pal.h>
5849 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
5850 +MODULE_DESCRIPTION("Generic IA-64 PMU description tables");
5851 +MODULE_LICENSE("GPL");
5853 +#define RDEP(x) (1UL << (x))
5855 +#define PFM_IA64GEN_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7))
5856 +#define PFM_IA64GEN_RSVD (0xffffffffffff0080UL)
5857 +#define PFM_IA64GEN_NO64 (1UL<<5)
5859 +/* forward declaration */
5860 +static struct pfm_pmu_config pfm_ia64gen_pmu_conf;
5862 +static struct pfm_arch_pmu_info pfm_ia64gen_pmu_info = {
5863 + .mask_pmcs = {PFM_IA64GEN_MASK_PMCS,},
5866 +static struct pfm_regmap_desc pfm_ia64gen_pmc_desc[] = {
5871 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 4),
5872 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 5),
5873 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 6),
5874 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 7)
5876 +#define PFM_IA64GEN_NUM_PMCS ARRAY_SIZE(pfm_ia64gen_pmc_desc)
5878 +static struct pfm_regmap_desc pfm_ia64gen_pmd_desc[] = {
5883 +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
5884 +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
5885 +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
5886 +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7)
5888 +#define PFM_IA64GEN_NUM_PMDS ARRAY_SIZE(pfm_ia64gen_pmd_desc)
5890 +static int pfm_ia64gen_pmc_check(struct pfm_context *ctx,
5891 + struct pfm_event_set *set,
5892 + struct pfarg_pmc *req)
5894 +#define PFM_IA64GEN_PMC_PM_POS6 (1UL<<6)
5898 + is_system = ctx->flags.system;
5899 + tmpval = req->reg_value;
5901 + switch (req->reg_num) {
5906 + /* set pmc.oi for 64-bit emulation */
5907 + tmpval |= 1UL << 5;
5910 + tmpval |= PFM_IA64GEN_PMC_PM_POS6;
5912 + tmpval &= ~PFM_IA64GEN_PMC_PM_POS6;
5916 + req->reg_value = tmpval;
5922 + * matches anything
5924 +static int pfm_ia64gen_probe_pmu(void)
5926 + u64 pm_buffer[16];
5927 + pal_perf_mon_info_u_t pm_info;
5930 + * call PAL_PERFMON_INFO to retrieve counter width which
5931 + * is implementation specific
5933 + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info))
5936 + pfm_ia64gen_pmu_conf.counter_width = pm_info.pal_perf_mon_info_s.width;
5942 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
5944 +static struct pfm_pmu_config pfm_ia64gen_pmu_conf = {
5945 + .pmu_name = "Generic IA-64",
5946 + .counter_width = 0, /* computed from PAL_PERFMON_INFO */
5947 + .pmd_desc = pfm_ia64gen_pmd_desc,
5948 + .pmc_desc = pfm_ia64gen_pmc_desc,
5949 + .probe_pmu = pfm_ia64gen_probe_pmu,
5950 + .num_pmc_entries = PFM_IA64GEN_NUM_PMCS,
5951 + .num_pmd_entries = PFM_IA64GEN_NUM_PMDS,
5952 + .pmc_write_check = pfm_ia64gen_pmc_check,
5954 + .flags = PFM_PMU_BUILTIN_FLAG,
5955 + .owner = THIS_MODULE,
5956 + .pmu_info = &pfm_ia64gen_pmu_info
5957 + /* no read/write checkers */
5960 +static int __init pfm_gen_pmu_init_module(void)
5962 + return pfm_pmu_register(&pfm_ia64gen_pmu_conf);
5965 +static void __exit pfm_gen_pmu_cleanup_module(void)
5967 + pfm_pmu_unregister(&pfm_ia64gen_pmu_conf);
5970 +module_init(pfm_gen_pmu_init_module);
5971 +module_exit(pfm_gen_pmu_cleanup_module);
5972 diff --git a/arch/ia64/perfmon/perfmon_itanium.c b/arch/ia64/perfmon/perfmon_itanium.c
5973 new file mode 100644
5974 index 0000000..094b31b
5976 +++ b/arch/ia64/perfmon/perfmon_itanium.c
5979 + * This file contains the Itanium PMU register description tables
5980 + * and pmc checker used by perfmon.c.
5982 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
5983 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
5985 + * This program is free software; you can redistribute it and/or
5986 + * modify it under the terms of version 2 of the GNU General Public
5987 + * License as published by the Free Software Foundation.
5989 + * This program is distributed in the hope that it will be useful,
5990 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5991 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5992 + * General Public License for more details.
5994 + * You should have received a copy of the GNU General Public License
5995 + * along with this program; if not, write to the Free Software
5996 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
5999 +#include <linux/module.h>
6000 +#include <linux/perfmon_kern.h>
6002 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
6003 +MODULE_DESCRIPTION("Itanium (Merced) PMU description tables");
6004 +MODULE_LICENSE("GPL");
6006 +#define RDEP(x) (1ULL << (x))
6008 +#define PFM_ITA_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\
6011 +#define PFM_ITA_NO64 (1ULL<<5)
6013 +static struct pfm_arch_pmu_info pfm_ita_pmu_info = {
6014 + .mask_pmcs = {PFM_ITA_MASK_PMCS,},
6016 +/* reserved bits are 1 in the mask */
6017 +#define PFM_ITA_RSVD 0xfffffffffc8000a0UL
6019 + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
6020 + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
6021 + * but this is fine because they are handled separately in the IA-64 specific
6024 +static struct pfm_regmap_desc pfm_ita_pmc_desc[] = {
6029 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 4),
6030 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 5),
6031 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 6),
6032 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 7),
6033 +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 8),
6034 +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 9),
6035 +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xfffffffff3f0ff30UL, 0, 10),
6036 +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x10000000UL, 0xffffffffecf0ff30UL, 0, 11),
6037 +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0030UL, 0, 12),
6038 +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x3ffff00000001UL, 0xfffffffffffffffeUL, 0, 13),
6039 +/* pmc14 */ PMX_NA,
6040 +/* pmc15 */ PMX_NA,
6041 +/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6042 +/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6043 +/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6044 +/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6045 +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6046 +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6047 +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6048 +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6049 +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6050 +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6051 +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6052 +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6053 +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6054 +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6055 +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6056 +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6057 +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6058 +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6059 +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6060 +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6061 +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6062 +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6063 +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6064 +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6065 +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6066 +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6067 +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6068 +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6069 +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6070 +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6071 +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0),
6072 +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
6073 +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2),
6074 +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
6075 +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4),
6076 +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
6077 +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6),
6078 +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
6079 +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0),
6080 +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
6081 +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2),
6082 +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
6083 +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4),
6084 +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
6085 +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6),
6086 +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
6088 +#define PFM_ITA_NUM_PMCS ARRAY_SIZE(pfm_ita_pmc_desc)
6090 +static struct pfm_regmap_desc pfm_ita_pmd_desc[] = {
6091 +/* pmd0 */ PMD_DP(PFM_REG_I , "PMD0", 0, 1ull << 10),
6092 +/* pmd1 */ PMD_DP(PFM_REG_I , "PMD1", 1, 1ull << 10),
6093 +/* pmd2 */ PMD_DP(PFM_REG_I , "PMD2", 2, 1ull << 11),
6094 +/* pmd3 */ PMD_DP(PFM_REG_I , "PMD3", 3, 1ull << 11),
6095 +/* pmd4 */ PMD_DP(PFM_REG_C , "PMD4", 4, 1ull << 4),
6096 +/* pmd5 */ PMD_DP(PFM_REG_C , "PMD5", 5, 1ull << 5),
6097 +/* pmd6 */ PMD_DP(PFM_REG_C , "PMD6", 6, 1ull << 6),
6098 +/* pmd7 */ PMD_DP(PFM_REG_C , "PMD7", 7, 1ull << 7),
6099 +/* pmd8 */ PMD_DP(PFM_REG_I , "PMD8", 8, 1ull << 12),
6100 +/* pmd9 */ PMD_DP(PFM_REG_I , "PMD9", 9, 1ull << 12),
6101 +/* pmd10 */ PMD_DP(PFM_REG_I , "PMD10", 10, 1ull << 12),
6102 +/* pmd11 */ PMD_DP(PFM_REG_I , "PMD11", 11, 1ull << 12),
6103 +/* pmd12 */ PMD_DP(PFM_REG_I , "PMD12", 12, 1ull << 12),
6104 +/* pmd13 */ PMD_DP(PFM_REG_I , "PMD13", 13, 1ull << 12),
6105 +/* pmd14 */ PMD_DP(PFM_REG_I , "PMD14", 14, 1ull << 12),
6106 +/* pmd15 */ PMD_DP(PFM_REG_I , "PMD15", 15, 1ull << 12),
6107 +/* pmd16 */ PMD_DP(PFM_REG_I , "PMD16", 16, 1ull << 12),
6108 +/* pmd17 */ PMD_DP(PFM_REG_I , "PMD17", 17, 1ull << 11)
6110 +#define PFM_ITA_NUM_PMDS ARRAY_SIZE(pfm_ita_pmd_desc)
6112 +static int pfm_ita_pmc_check(struct pfm_context *ctx,
6113 + struct pfm_event_set *set,
6114 + struct pfarg_pmc *req)
6116 +#define PFM_ITA_PMC_PM_POS6 (1UL<<6)
6117 + struct pfm_arch_context *ctx_arch;
6120 + int ret = 0, is_system;
6122 + tmpval = req->reg_value;
6123 + cnum = req->reg_num;
6124 + ctx_arch = pfm_ctx_arch(ctx);
6125 + is_system = ctx->flags.system;
6136 + tmpval |= PFM_ITA_PMC_PM_POS6;
6138 + tmpval &= ~PFM_ITA_PMC_PM_POS6;
6143 + * we must clear the (instruction) debug registers if pmc13.ta bit is
6144 + * cleared before they are written (fl_using_dbreg==0) to avoid
6145 + * picking up stale information.
6147 + if (cnum == 13 && ((tmpval & 0x1) == 0)
6148 + && ctx_arch->flags.use_dbr == 0) {
6149 + PFM_DBG("pmc13 has pmc13.ta cleared, clearing ibr");
6150 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6156 + * we must clear the (data) debug registers if pmc11.pt bit is cleared
6157 + * before they are written (fl_using_dbreg==0) to avoid picking up
6158 + * stale information.
6160 + if (cnum == 11 && ((tmpval >> 28) & 0x1) == 0
6161 + && ctx_arch->flags.use_dbr == 0) {
6162 + PFM_DBG("pmc11 has pmc11.pt cleared, clearing dbr");
6163 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6168 + req->reg_value = tmpval;
6173 +static int pfm_ita_probe_pmu(void)
6175 + return local_cpu_data->family == 0x7 && !ia64_platform_is("hpsim")
6180 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
6182 +static struct pfm_pmu_config pfm_ita_pmu_conf = {
6183 + .pmu_name = "Itanium",
6184 + .counter_width = 32,
6185 + .pmd_desc = pfm_ita_pmd_desc,
6186 + .pmc_desc = pfm_ita_pmc_desc,
6187 + .pmc_write_check = pfm_ita_pmc_check,
6188 + .num_pmc_entries = PFM_ITA_NUM_PMCS,
6189 + .num_pmd_entries = PFM_ITA_NUM_PMDS,
6190 + .probe_pmu = pfm_ita_probe_pmu,
6192 + .flags = PFM_PMU_BUILTIN_FLAG,
6193 + .owner = THIS_MODULE,
6194 + .pmu_info = &pfm_ita_pmu_info
6197 +static int __init pfm_ita_pmu_init_module(void)
6199 + return pfm_pmu_register(&pfm_ita_pmu_conf);
6202 +static void __exit pfm_ita_pmu_cleanup_module(void)
6204 + pfm_pmu_unregister(&pfm_ita_pmu_conf);
6207 +module_init(pfm_ita_pmu_init_module);
6208 +module_exit(pfm_ita_pmu_cleanup_module);
6210 diff --git a/arch/ia64/perfmon/perfmon_mckinley.c b/arch/ia64/perfmon/perfmon_mckinley.c
6211 new file mode 100644
6212 index 0000000..dc59092
6214 +++ b/arch/ia64/perfmon/perfmon_mckinley.c
6217 + * This file contains the McKinley PMU register description tables
6218 + * and pmc checker used by perfmon.c.
6220 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
6221 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
6223 + * This program is free software; you can redistribute it and/or
6224 + * modify it under the terms of version 2 of the GNU General Public
6225 + * License as published by the Free Software Foundation.
6227 + * This program is distributed in the hope that it will be useful,
6228 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6229 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6230 + * General Public License for more details.
6232 + * You should have received a copy of the GNU General Public License
6233 + * along with this program; if not, write to the Free Software
6234 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
6237 +#include <linux/module.h>
6238 +#include <linux/perfmon_kern.h>
6240 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
6241 +MODULE_DESCRIPTION("Itanium 2 (McKinley) PMU description tables");
6242 +MODULE_LICENSE("GPL");
6244 +#define RDEP(x) (1UL << (x))
6246 +#define PFM_MCK_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\
6249 +#define PFM_MCK_NO64 (1UL<<5)
6251 +static struct pfm_arch_pmu_info pfm_mck_pmu_info = {
6252 + .mask_pmcs = {PFM_MCK_MASK_PMCS,},
6255 +/* reserved bits are 1 in the mask */
6256 +#define PFM_ITA2_RSVD 0xfffffffffc8000a0UL
6259 + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
6260 + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
6261 + * but this is fine because they are handled separately in the IA-64 specific
6264 +static struct pfm_regmap_desc pfm_mck_pmc_desc[] = {
6269 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x800020UL, 0xfffffffffc8000a0, PFM_MCK_NO64, 4),
6270 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 5),
6271 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 6),
6272 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 7),
6273 +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xffffffff3fffffffUL, 0xc0000004UL, 0, 8),
6274 +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xffffffff3ffffffcUL, 0xc0000004UL, 0, 9),
6275 +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xffffffffffff0000UL, 0, 10),
6276 +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x0, 0xfffffffffcf0fe30UL, 0, 11),
6277 +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0000UL, 0, 12),
6278 +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x2078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 13),
6279 +/* pmc14 */ PMC_D(PFM_REG_W , "PMC14", 0x0db60db60db60db6UL, 0xffffffffffffdb6dUL, 0, 14),
6280 +/* pmc15 */ PMC_D(PFM_REG_W , "PMC15", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 15),
6281 +/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6282 +/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6283 +/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6284 +/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6285 +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6286 +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6287 +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6288 +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6289 +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6290 +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6291 +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6292 +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6293 +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6294 +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6295 +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6296 +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6297 +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6298 +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6299 +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6300 +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6301 +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6302 +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6303 +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6304 +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6305 +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6306 +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6307 +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6308 +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6309 +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6310 +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6311 +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0),
6312 +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
6313 +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2),
6314 +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
6315 +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4),
6316 +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
6317 +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6),
6318 +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
6319 +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0),
6320 +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
6321 +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2),
6322 +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
6323 +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4),
6324 +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
6325 +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6),
6326 +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
6328 +#define PFM_MCK_NUM_PMCS ARRAY_SIZE(pfm_mck_pmc_desc)
6330 +static struct pfm_regmap_desc pfm_mck_pmd_desc[] = {
6331 +/* pmd0 */ PMD_DP(PFM_REG_I, "PMD0", 0, 1ull << 10),
6332 +/* pmd1 */ PMD_DP(PFM_REG_I, "PMD1", 1, 1ull << 10),
6333 +/* pmd2 */ PMD_DP(PFM_REG_I, "PMD2", 2, 1ull << 11),
6334 +/* pmd3 */ PMD_DP(PFM_REG_I, "PMD3", 3, 1ull << 11),
6335 +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
6336 +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
6337 +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
6338 +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7),
6339 +/* pmd8 */ PMD_DP(PFM_REG_I, "PMD8", 8, 1ull << 12),
6340 +/* pmd9 */ PMD_DP(PFM_REG_I, "PMD9", 9, 1ull << 12),
6341 +/* pmd10 */ PMD_DP(PFM_REG_I, "PMD10", 10, 1ull << 12),
6342 +/* pmd11 */ PMD_DP(PFM_REG_I, "PMD11", 11, 1ull << 12),
6343 +/* pmd12 */ PMD_DP(PFM_REG_I, "PMD12", 12, 1ull << 12),
6344 +/* pmd13 */ PMD_DP(PFM_REG_I, "PMD13", 13, 1ull << 12),
6345 +/* pmd14 */ PMD_DP(PFM_REG_I, "PMD14", 14, 1ull << 12),
6346 +/* pmd15 */ PMD_DP(PFM_REG_I, "PMD15", 15, 1ull << 12),
6347 +/* pmd16 */ PMD_DP(PFM_REG_I, "PMD16", 16, 1ull << 12),
6348 +/* pmd17 */ PMD_DP(PFM_REG_I, "PMD17", 17, 1ull << 11)
6350 +#define PFM_MCK_NUM_PMDS ARRAY_SIZE(pfm_mck_pmd_desc)
6352 +static int pfm_mck_pmc_check(struct pfm_context *ctx,
6353 + struct pfm_event_set *set,
6354 + struct pfarg_pmc *req)
6356 + struct pfm_arch_context *ctx_arch;
6357 + u64 val8 = 0, val14 = 0, val13 = 0;
6360 + int ret = 0, check_case1 = 0;
6363 + tmpval = req->reg_value;
6364 + cnum = req->reg_num;
6365 + ctx_arch = pfm_ctx_arch(ctx);
6366 + is_system = ctx->flags.system;
6368 +#define PFM_MCK_PMC_PM_POS6 (1UL<<6)
6369 +#define PFM_MCK_PMC_PM_POS4 (1UL<<4)
6379 + tmpval |= PFM_MCK_PMC_PM_POS6;
6381 + tmpval &= ~PFM_MCK_PMC_PM_POS6;
6386 + val13 = set->pmcs[13];
6387 + val14 = set->pmcs[14];
6393 + tmpval |= PFM_MCK_PMC_PM_POS4;
6395 + tmpval &= ~PFM_MCK_PMC_PM_POS4;
6399 + val8 = set->pmcs[8];
6401 + val14 = set->pmcs[14];
6406 + val8 = set->pmcs[8];
6407 + val13 = set->pmcs[13];
6414 + * check illegal configuration which can produce inconsistencies
6415 + * in tagging i-side events in L1D and L2 caches
6417 + if (check_case1) {
6418 + ret = (((val13 >> 45) & 0xf) == 0 && ((val8 & 0x1) == 0))
6419 + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
6420 + || (((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
6423 + PFM_DBG("perfmon: invalid config pmc8=0x%lx "
6424 + "pmc13=0x%lx pmc14=0x%lx",
6425 + val8, val13, val14);
6431 + * check if configuration implicitely activates the use of
6432 + * the debug registers. If true, then we ensure that this is
6433 + * possible and that we do not pick up stale value in the HW
6436 + * We postpone the checks of pmc13 and pmc14 to avoid side effects
6437 + * in case of errors
6441 + * pmc13 is "active" if:
6442 + * one of the pmc13.cfg_dbrpXX field is different from 0x3
6444 + * at the corresponding pmc13.ena_dbrpXX is set.
6446 + if (cnum == 13 && (tmpval & 0x1e00000000000UL)
6447 + && (tmpval & 0x18181818UL) != 0x18181818UL
6448 + && ctx_arch->flags.use_dbr == 0) {
6449 + PFM_DBG("pmc13=0x%lx active", tmpval);
6450 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6456 + * if any pmc14.ibrpX bit is enabled we must clear the ibrs
6458 + if (cnum == 14 && ((tmpval & 0x2222UL) != 0x2222UL)
6459 + && ctx_arch->flags.use_dbr == 0) {
6460 + PFM_DBG("pmc14=0x%lx active", tmpval);
6461 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6466 + req->reg_value = tmpval;
6471 +static int pfm_mck_probe_pmu(void)
6473 + return local_cpu_data->family == 0x1f ? 0 : -1;
6477 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
6479 +static struct pfm_pmu_config pfm_mck_pmu_conf = {
6480 + .pmu_name = "Itanium 2",
6481 + .counter_width = 47,
6482 + .pmd_desc = pfm_mck_pmd_desc,
6483 + .pmc_desc = pfm_mck_pmc_desc,
6484 + .pmc_write_check = pfm_mck_pmc_check,
6485 + .num_pmc_entries = PFM_MCK_NUM_PMCS,
6486 + .num_pmd_entries = PFM_MCK_NUM_PMDS,
6487 + .probe_pmu = pfm_mck_probe_pmu,
6489 + .flags = PFM_PMU_BUILTIN_FLAG,
6490 + .owner = THIS_MODULE,
6491 + .pmu_info = &pfm_mck_pmu_info,
6494 +static int __init pfm_mck_pmu_init_module(void)
6496 + return pfm_pmu_register(&pfm_mck_pmu_conf);
6499 +static void __exit pfm_mck_pmu_cleanup_module(void)
6501 + pfm_pmu_unregister(&pfm_mck_pmu_conf);
6504 +module_init(pfm_mck_pmu_init_module);
6505 +module_exit(pfm_mck_pmu_cleanup_module);
6506 diff --git a/arch/ia64/perfmon/perfmon_montecito.c b/arch/ia64/perfmon/perfmon_montecito.c
6507 new file mode 100644
6508 index 0000000..3f76f73
6510 +++ b/arch/ia64/perfmon/perfmon_montecito.c
6513 + * This file contains the McKinley PMU register description tables
6514 + * and pmc checker used by perfmon.c.
6516 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
6517 + * Contributed Stephane Eranian <eranian@hpl.hp.com>
6519 + * This program is free software; you can redistribute it and/or
6520 + * modify it under the terms of version 2 of the GNU General Public
6521 + * License as published by the Free Software Foundation.
6523 + * This program is distributed in the hope that it will be useful,
6524 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6525 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6526 + * General Public License for more details.
6528 + * You should have received a copy of the GNU General Public License
6529 + * along with this program; if not, write to the Free Software
6530 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
6533 +#include <linux/module.h>
6534 +#include <linux/smp.h>
6535 +#include <linux/perfmon_kern.h>
6537 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
6538 +MODULE_DESCRIPTION("Dual-Core Itanium 2 (Montecito) PMU description table");
6539 +MODULE_LICENSE("GPL");
6541 +#define RDEP(x) (1UL << (x))
6543 +#define PFM_MONT_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|\
6544 + RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|\
6545 + RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|\
6546 + RDEP(37)|RDEP(39)|RDEP(40)|RDEP(42))
6548 +#define PFM_MONT_NO64 (1UL<<5)
6550 +static struct pfm_arch_pmu_info pfm_mont_pmu_info = {
6551 + .mask_pmcs = {PFM_MONT_MASK_PMCS,},
6554 +#define PFM_MONT_RSVD 0xffffffff838000a0UL
6557 + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
6558 + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
6559 + * but this is fine because they are handled separately in the IA-64 specific
6562 + * For PMC4-PMC15, PMC40: we force pmc.ism=2 (IA-64 mode only)
6564 +static struct pfm_regmap_desc pfm_mont_pmc_desc[] = {
6569 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 4),
6570 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 5),
6571 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 6),
6572 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 7),
6573 +/* pmc8 */ PMC_D(PFM_REG_W64, "PMC8" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 8),
6574 +/* pmc9 */ PMC_D(PFM_REG_W64, "PMC9" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 9),
6575 +/* pmc10 */ PMC_D(PFM_REG_W64, "PMC10", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 10),
6576 +/* pmc11 */ PMC_D(PFM_REG_W64, "PMC11", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 11),
6577 +/* pmc12 */ PMC_D(PFM_REG_W64, "PMC12", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 12),
6578 +/* pmc13 */ PMC_D(PFM_REG_W64, "PMC13", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 13),
6579 +/* pmc14 */ PMC_D(PFM_REG_W64, "PMC14", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 14),
6580 +/* pmc15 */ PMC_D(PFM_REG_W64, "PMC15", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 15),
6581 +/* pmc16 */ PMX_NA,
6582 +/* pmc17 */ PMX_NA,
6583 +/* pmc18 */ PMX_NA,
6584 +/* pmc19 */ PMX_NA,
6585 +/* pmc20 */ PMX_NA,
6586 +/* pmc21 */ PMX_NA,
6587 +/* pmc22 */ PMX_NA,
6588 +/* pmc23 */ PMX_NA,
6589 +/* pmc24 */ PMX_NA,
6590 +/* pmc25 */ PMX_NA,
6591 +/* pmc26 */ PMX_NA,
6592 +/* pmc27 */ PMX_NA,
6593 +/* pmc28 */ PMX_NA,
6594 +/* pmc29 */ PMX_NA,
6595 +/* pmc30 */ PMX_NA,
6596 +/* pmc31 */ PMX_NA,
6597 +/* pmc32 */ PMC_D(PFM_REG_W , "PMC32", 0x30f01ffffffffffUL, 0xfcf0fe0000000000UL, 0, 32),
6598 +/* pmc33 */ PMC_D(PFM_REG_W , "PMC33", 0x0, 0xfffffe0000000000UL, 0, 33),
6599 +/* pmc34 */ PMC_D(PFM_REG_W , "PMC34", 0xf01ffffffffffUL, 0xfff0fe0000000000UL, 0, 34),
6600 +/* pmc35 */ PMC_D(PFM_REG_W , "PMC35", 0x0, 0x1ffffffffffUL, 0, 35),
6601 +/* pmc36 */ PMC_D(PFM_REG_W , "PMC36", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 36),
6602 +/* pmc37 */ PMC_D(PFM_REG_W , "PMC37", 0x0, 0xffffffffffffc000UL, 0, 37),
6603 +/* pmc38 */ PMC_D(PFM_REG_W , "PMC38", 0xdb6UL, 0xffffffffffffdb6dUL, 0, 38),
6604 +/* pmc39 */ PMC_D(PFM_REG_W , "PMC39", 0x0, 0xffffffffffff0030UL, 0, 39),
6605 +/* pmc40 */ PMC_D(PFM_REG_W , "PMC40", 0x2000000UL, 0xfffffffffff0fe30UL, 0, 40),
6606 +/* pmc41 */ PMC_D(PFM_REG_W , "PMC41", 0x00002078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 41),
6607 +/* pmc42 */ PMC_D(PFM_REG_W , "PMC42", 0x0, 0xfff800b0UL, 0, 42),
6608 +/* pmc43 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6609 +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6610 +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6611 +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6612 +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6613 +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6614 +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6615 +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6616 +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6617 +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6618 +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6619 +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6620 +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6621 +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6622 +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6623 +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6624 +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6625 +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6626 +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6627 +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6628 +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6629 +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6630 +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6631 +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6632 +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6633 +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6634 +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6635 +/* pmc256 */ PMC_D(PFM_REG_W, "IBR0", 0x0, 0, 0, 0),
6636 +/* pmc257 */ PMC_D(PFM_REG_W, "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
6637 +/* pmc258 */ PMC_D(PFM_REG_W, "IBR2", 0x0, 0, 0, 2),
6638 +/* pmc259 */ PMC_D(PFM_REG_W, "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
6639 +/* pmc260 */ PMC_D(PFM_REG_W, "IBR4", 0x0, 0, 0, 4),
6640 +/* pmc261 */ PMC_D(PFM_REG_W, "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
6641 +/* pmc262 */ PMC_D(PFM_REG_W, "IBR6", 0x0, 0, 0, 6),
6642 +/* pmc263 */ PMC_D(PFM_REG_W, "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
6643 +/* pmc264 */ PMC_D(PFM_REG_W, "DBR0", 0x0, 0, 0, 0),
6644 +/* pmc265 */ PMC_D(PFM_REG_W, "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
6645 +/* pmc266 */ PMC_D(PFM_REG_W, "DBR2", 0x0, 0, 0, 2),
6646 +/* pmc267 */ PMC_D(PFM_REG_W, "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
6647 +/* pmc268 */ PMC_D(PFM_REG_W, "DBR4", 0x0, 0, 0, 4),
6648 +/* pmc269 */ PMC_D(PFM_REG_W, "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
6649 +/* pmc270 */ PMC_D(PFM_REG_W, "DBR6", 0x0, 0, 0, 6),
6650 +/* pmc271 */ PMC_D(PFM_REG_W, "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
6652 +#define PFM_MONT_NUM_PMCS ARRAY_SIZE(pfm_mont_pmc_desc)
6654 +static struct pfm_regmap_desc pfm_mont_pmd_desc[] = {
6659 +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
6660 +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
6661 +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
6662 +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7),
6663 +/* pmd8 */ PMD_DP(PFM_REG_C, "PMD8", 8, 1ull << 8),
6664 +/* pmd9 */ PMD_DP(PFM_REG_C, "PMD9", 9, 1ull << 9),
6665 +/* pmd10 */ PMD_DP(PFM_REG_C, "PMD10", 10, 1ull << 10),
6666 +/* pmd11 */ PMD_DP(PFM_REG_C, "PMD11", 11, 1ull << 11),
6667 +/* pmd12 */ PMD_DP(PFM_REG_C, "PMD12", 12, 1ull << 12),
6668 +/* pmd13 */ PMD_DP(PFM_REG_C, "PMD13", 13, 1ull << 13),
6669 +/* pmd14 */ PMD_DP(PFM_REG_C, "PMD14", 14, 1ull << 14),
6670 +/* pmd15 */ PMD_DP(PFM_REG_C, "PMD15", 15, 1ull << 15),
6671 +/* pmd16 */ PMX_NA,
6672 +/* pmd17 */ PMX_NA,
6673 +/* pmd18 */ PMX_NA,
6674 +/* pmd19 */ PMX_NA,
6675 +/* pmd20 */ PMX_NA,
6676 +/* pmd21 */ PMX_NA,
6677 +/* pmd22 */ PMX_NA,
6678 +/* pmd23 */ PMX_NA,
6679 +/* pmd24 */ PMX_NA,
6680 +/* pmd25 */ PMX_NA,
6681 +/* pmd26 */ PMX_NA,
6682 +/* pmd27 */ PMX_NA,
6683 +/* pmd28 */ PMX_NA,
6684 +/* pmd29 */ PMX_NA,
6685 +/* pmd30 */ PMX_NA,
6686 +/* pmd31 */ PMX_NA,
6687 +/* pmd32 */ PMD_DP(PFM_REG_I, "PMD32", 32, 1ull << 40),
6688 +/* pmd33 */ PMD_DP(PFM_REG_I, "PMD33", 33, 1ull << 40),
6689 +/* pmd34 */ PMD_DP(PFM_REG_I, "PMD34", 34, 1ull << 37),
6690 +/* pmd35 */ PMD_DP(PFM_REG_I, "PMD35", 35, 1ull << 37),
6691 +/* pmd36 */ PMD_DP(PFM_REG_I, "PMD36", 36, 1ull << 40),
6692 +/* pmd37 */ PMX_NA,
6693 +/* pmd38 */ PMD_DP(PFM_REG_I, "PMD38", 38, (1ull<<39)|(1ull<<42)),
6694 +/* pmd39 */ PMD_DP(PFM_REG_I, "PMD39", 39, (1ull<<39)|(1ull<<42)),
6695 +/* pmd40 */ PMX_NA,
6696 +/* pmd41 */ PMX_NA,
6697 +/* pmd42 */ PMX_NA,
6698 +/* pmd43 */ PMX_NA,
6699 +/* pmd44 */ PMX_NA,
6700 +/* pmd45 */ PMX_NA,
6701 +/* pmd46 */ PMX_NA,
6702 +/* pmd47 */ PMX_NA,
6703 +/* pmd48 */ PMD_DP(PFM_REG_I, "PMD48", 48, (1ull<<39)|(1ull<<42)),
6704 +/* pmd49 */ PMD_DP(PFM_REG_I, "PMD49", 49, (1ull<<39)|(1ull<<42)),
6705 +/* pmd50 */ PMD_DP(PFM_REG_I, "PMD50", 50, (1ull<<39)|(1ull<<42)),
6706 +/* pmd51 */ PMD_DP(PFM_REG_I, "PMD51", 51, (1ull<<39)|(1ull<<42)),
6707 +/* pmd52 */ PMD_DP(PFM_REG_I, "PMD52", 52, (1ull<<39)|(1ull<<42)),
6708 +/* pmd53 */ PMD_DP(PFM_REG_I, "PMD53", 53, (1ull<<39)|(1ull<<42)),
6709 +/* pmd54 */ PMD_DP(PFM_REG_I, "PMD54", 54, (1ull<<39)|(1ull<<42)),
6710 +/* pmd55 */ PMD_DP(PFM_REG_I, "PMD55", 55, (1ull<<39)|(1ull<<42)),
6711 +/* pmd56 */ PMD_DP(PFM_REG_I, "PMD56", 56, (1ull<<39)|(1ull<<42)),
6712 +/* pmd57 */ PMD_DP(PFM_REG_I, "PMD57", 57, (1ull<<39)|(1ull<<42)),
6713 +/* pmd58 */ PMD_DP(PFM_REG_I, "PMD58", 58, (1ull<<39)|(1ull<<42)),
6714 +/* pmd59 */ PMD_DP(PFM_REG_I, "PMD59", 59, (1ull<<39)|(1ull<<42)),
6715 +/* pmd60 */ PMD_DP(PFM_REG_I, "PMD60", 60, (1ull<<39)|(1ull<<42)),
6716 +/* pmd61 */ PMD_DP(PFM_REG_I, "PMD61", 61, (1ull<<39)|(1ull<<42)),
6717 +/* pmd62 */ PMD_DP(PFM_REG_I, "PMD62", 62, (1ull<<39)|(1ull<<42)),
6718 +/* pmd63 */ PMD_DP(PFM_REG_I, "PMD63", 63, (1ull<<39)|(1ull<<42))
6720 +#define PFM_MONT_NUM_PMDS ARRAY_SIZE(pfm_mont_pmd_desc)
6722 +static int pfm_mont_has_ht;
6724 +static int pfm_mont_pmc_check(struct pfm_context *ctx,
6725 + struct pfm_event_set *set,
6726 + struct pfarg_pmc *req)
6728 + struct pfm_arch_context *ctx_arch;
6729 + u64 val32 = 0, val38 = 0, val41 = 0;
6732 + int ret = 0, check_case1 = 0;
6735 + tmpval = req->reg_value;
6736 + cnum = req->reg_num;
6737 + ctx_arch = pfm_ctx_arch(ctx);
6738 + is_system = ctx->flags.system;
6740 +#define PFM_MONT_PMC_PM_POS6 (1UL<<6)
6741 +#define PFM_MONT_PMC_PM_POS4 (1UL<<4)
6751 + tmpval |= PFM_MONT_PMC_PM_POS6;
6753 + tmpval &= ~PFM_MONT_PMC_PM_POS6;
6761 + if ((req->reg_flags & PFM_REGFL_NO_EMUL64) == 0) {
6762 + if (pfm_mont_has_ht) {
6763 + PFM_INFO("perfmon: Errata 121 PMD10/PMD15 cannot be used to overflow"
6764 + "when threads on on");
6769 + tmpval |= PFM_MONT_PMC_PM_POS6;
6771 + tmpval &= ~PFM_MONT_PMC_PM_POS6;
6776 + if (pfm_mont_has_ht && ((req->reg_value >> 8) & 0x7) == 4) {
6777 + PFM_INFO("perfmon: Errata 120: IP-EAR not available when threads are on");
6781 + tmpval |= PFM_MONT_PMC_PM_POS6;
6783 + tmpval &= ~PFM_MONT_PMC_PM_POS6;
6788 + val38 = set->pmcs[38];
6789 + val41 = set->pmcs[41];
6795 + tmpval |= PFM_MONT_PMC_PM_POS4;
6797 + tmpval &= ~PFM_MONT_PMC_PM_POS4;
6802 + val32 = set->pmcs[32];
6803 + val41 = set->pmcs[41];
6808 + val32 = set->pmcs[32];
6809 + val38 = set->pmcs[38];
6814 + if (check_case1) {
6815 + ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
6816 + && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
6817 + || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
6819 + PFM_DBG("perfmon: invalid config pmc38=0x%lx "
6820 + "pmc41=0x%lx pmc32=0x%lx",
6821 + val38, val41, val32);
6827 + * check if configuration implicitely activates the use of the
6828 + * debug registers. If true, then we ensure that this is possible
6829 + * and that we do not pick up stale value in the HW registers.
6834 + * pmc41 is "active" if:
6835 + * one of the pmc41.cfgdtagXX field is different from 0x3
6837 + * the corsesponding pmc41.en_dbrpXX is set.
6839 + * ctx_fl_use_dbr (dbr not yet used)
6842 + && (tmpval & 0x1e00000000000)
6843 + && (tmpval & 0x18181818) != 0x18181818
6844 + && ctx_arch->flags.use_dbr == 0) {
6845 + PFM_DBG("pmc41=0x%lx active, clearing dbr", tmpval);
6846 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6851 + * we must clear the (instruction) debug registers if:
6852 + * pmc38.ig_ibrpX is 0 (enabled)
6854 + * fl_use_dbr == 0 (dbr not yet used)
6856 + if (cnum == 38 && ((tmpval & 0x492) != 0x492)
6857 + && ctx_arch->flags.use_dbr == 0) {
6858 + PFM_DBG("pmc38=0x%lx active pmc38, clearing ibr", tmpval);
6859 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6864 + req->reg_value = tmpval;
6868 +static void pfm_handle_errata(void)
6870 + pfm_mont_has_ht = 1;
6872 + PFM_INFO("activating workaround for errata 120 "
6873 + "(Disable IP-EAR when threads are on)");
6875 + PFM_INFO("activating workaround for Errata 121 "
6876 + "(PMC10-PMC15 cannot be used to overflow"
6877 + " when threads are on");
6879 +static int pfm_mont_probe_pmu(void)
6881 + if (local_cpu_data->family != 0x20)
6885 + * the 2 errata must be activated when
6886 + * threads are/can be enabled
6888 + if (is_multithreading_enabled())
6889 + pfm_handle_errata();
6895 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
6897 +static struct pfm_pmu_config pfm_mont_pmu_conf = {
6898 + .pmu_name = "Montecito",
6899 + .counter_width = 47,
6900 + .pmd_desc = pfm_mont_pmd_desc,
6901 + .pmc_desc = pfm_mont_pmc_desc,
6902 + .num_pmc_entries = PFM_MONT_NUM_PMCS,
6903 + .num_pmd_entries = PFM_MONT_NUM_PMDS,
6904 + .pmc_write_check = pfm_mont_pmc_check,
6905 + .probe_pmu = pfm_mont_probe_pmu,
6907 + .pmu_info = &pfm_mont_pmu_info,
6908 + .flags = PFM_PMU_BUILTIN_FLAG,
6909 + .owner = THIS_MODULE
6912 +static int __init pfm_mont_pmu_init_module(void)
6914 + return pfm_pmu_register(&pfm_mont_pmu_conf);
6917 +static void __exit pfm_mont_pmu_cleanup_module(void)
6919 + pfm_pmu_unregister(&pfm_mont_pmu_conf);
6922 +module_init(pfm_mont_pmu_init_module);
6923 +module_exit(pfm_mont_pmu_cleanup_module);
6924 diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
6925 index 1e06d23..b87f445 100644
6926 --- a/arch/mips/Kconfig
6927 +++ b/arch/mips/Kconfig
6928 @@ -1857,6 +1857,8 @@ config SECCOMP
6930 If unsure, say Y. Only embedded should say N here.
6932 +source "arch/mips/perfmon/Kconfig"
6936 config RWSEM_GENERIC_SPINLOCK
6937 diff --git a/arch/mips/Makefile b/arch/mips/Makefile
6938 index 9aab51c..712acf7 100644
6939 --- a/arch/mips/Makefile
6940 +++ b/arch/mips/Makefile
6941 @@ -154,6 +154,12 @@ endif
6948 +core-$(CONFIG_PERFMON) += arch/mips/perfmon/
6953 libs-$(CONFIG_ARC) += arch/mips/fw/arc/
6954 diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
6955 index 22fc19b..4467361 100644
6956 --- a/arch/mips/kernel/process.c
6957 +++ b/arch/mips/kernel/process.c
6959 #include <linux/completion.h>
6960 #include <linux/kallsyms.h>
6961 #include <linux/random.h>
6962 +#include <linux/perfmon_kern.h>
6964 #include <asm/asm.h>
6965 #include <asm/bootinfo.h>
6966 @@ -94,6 +95,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
6968 void exit_thread(void)
6970 + pfm_exit_thread();
6973 void flush_thread(void)
6974 @@ -162,6 +164,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
6975 if (clone_flags & CLONE_SETTLS)
6976 ti->tp_value = regs->regs[7];
6978 + pfm_copy_thread(p);
6983 diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
6984 index 5e75a31..e96ddd6 100644
6985 --- a/arch/mips/kernel/scall32-o32.S
6986 +++ b/arch/mips/kernel/scall32-o32.S
6987 @@ -653,6 +653,18 @@ einval: li v0, -EINVAL
6990 sys sys_inotify_init1 1
6991 + sys sys_pfm_create_context 4 /* 4330 */
6992 + sys sys_pfm_write_pmcs 3
6993 + sys sys_pfm_write_pmds 4
6994 + sys sys_pfm_read_pmds 3
6995 + sys sys_pfm_load_context 2
6996 + sys sys_pfm_start 2 /* 4335 */
6997 + sys sys_pfm_stop 1
6998 + sys sys_pfm_restart 1
6999 + sys sys_pfm_create_evtsets 3
7000 + sys sys_pfm_getinfo_evtsets 3
7001 + sys sys_pfm_delete_evtsets 3 /* 4340 */
7002 + sys sys_pfm_unload_context 1
7005 /* We pre-compute the number of _instruction_ bytes needed to
7006 diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
7007 index 3d58204..adb2ba9 100644
7008 --- a/arch/mips/kernel/scall64-64.S
7009 +++ b/arch/mips/kernel/scall64-64.S
7010 @@ -487,4 +487,16 @@ sys_call_table:
7013 PTR sys_inotify_init1
7014 + PTR sys_pfm_create_context
7015 + PTR sys_pfm_write_pmcs /* 5290 */
7016 + PTR sys_pfm_write_pmds
7017 + PTR sys_pfm_read_pmds
7018 + PTR sys_pfm_load_context
7020 + PTR sys_pfm_stop /* 5295 */
7021 + PTR sys_pfm_restart
7022 + PTR sys_pfm_create_evtsets
7023 + PTR sys_pfm_getinfo_evtsets
7024 + PTR sys_pfm_delete_evtsets
7025 + PTR sys_pfm_unload_context /* 5300 */
7026 .size sys_call_table,.-sys_call_table
7027 diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
7028 index da7f1b6..6d12095 100644
7029 --- a/arch/mips/kernel/scall64-n32.S
7030 +++ b/arch/mips/kernel/scall64-n32.S
7031 @@ -400,12 +400,12 @@ EXPORT(sysn32_call_table)
7034 PTR compat_sys_utimensat
7035 - PTR compat_sys_signalfd /* 5280 */
7036 + PTR compat_sys_signalfd /* 6280 */
7040 PTR sys_timerfd_create
7041 - PTR sys_timerfd_gettime /* 5285 */
7042 + PTR sys_timerfd_gettime /* 6285 */
7043 PTR sys_timerfd_settime
7046 @@ -413,4 +413,16 @@ EXPORT(sysn32_call_table)
7047 PTR sys_dup3 /* 5290 */
7049 PTR sys_inotify_init1
7050 + PTR sys_pfm_create_context
7051 + PTR sys_pfm_write_pmcs
7052 + PTR sys_pfm_write_pmds /* 6295 */
7053 + PTR sys_pfm_read_pmds
7054 + PTR sys_pfm_load_context
7057 + PTR sys_pfm_restart /* 6300 */
7058 + PTR sys_pfm_create_evtsets
7059 + PTR sys_pfm_getinfo_evtsets
7060 + PTR sys_pfm_delete_evtsets
7061 + PTR sys_pfm_unload_context
7062 .size sysn32_call_table,.-sysn32_call_table
7063 diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
7064 index d7cd1aa..e77f55a 100644
7065 --- a/arch/mips/kernel/scall64-o32.S
7066 +++ b/arch/mips/kernel/scall64-o32.S
7067 @@ -535,4 +535,16 @@ sys_call_table:
7070 PTR sys_inotify_init1
7071 + PTR sys_pfm_create_context /* 4330 */
7072 + PTR sys_pfm_write_pmcs
7073 + PTR sys_pfm_write_pmds
7074 + PTR sys_pfm_read_pmds
7075 + PTR sys_pfm_load_context
7076 + PTR sys_pfm_start /* 4335 */
7078 + PTR sys_pfm_restart
7079 + PTR sys_pfm_create_evtsets
7080 + PTR sys_pfm_getinfo_evtsets
7081 + PTR sys_pfm_delete_evtsets /* 4340 */
7082 + PTR sys_pfm_unload_context
7083 .size sys_call_table,.-sys_call_table
7084 diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
7085 index a4e106c..6a7e60c 100644
7086 --- a/arch/mips/kernel/signal.c
7087 +++ b/arch/mips/kernel/signal.c
7089 #include <linux/unistd.h>
7090 #include <linux/compiler.h>
7091 #include <linux/uaccess.h>
7092 +#include <linux/perfmon_kern.h>
7094 #include <asm/abi.h>
7095 #include <asm/asm.h>
7096 @@ -694,8 +695,11 @@ static void do_signal(struct pt_regs *regs)
7097 * - triggered by the TIF_WORK_MASK flags
7099 asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
7100 - __u32 thread_info_flags)
7101 + __u32 thread_info_flags)
7103 + if (thread_info_flags & _TIF_PERFMON_WORK)
7104 + pfm_handle_work(regs);
7106 /* deal with pending signal delivery */
7107 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
7109 diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
7110 index 1f467d5..163dfe4 100644
7111 --- a/arch/mips/kernel/time.c
7112 +++ b/arch/mips/kernel/time.c
7113 @@ -49,10 +49,11 @@ int update_persistent_clock(struct timespec now)
7114 return rtc_mips_set_mmss(now.tv_sec);
7117 -static int null_perf_irq(void)
7118 +int null_perf_irq(void)
7122 +EXPORT_SYMBOL(null_perf_irq);
7124 int (*perf_irq)(void) = null_perf_irq;
7126 diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
7127 index b602ac6..9cbd75f 100644
7128 --- a/arch/mips/kernel/traps.c
7129 +++ b/arch/mips/kernel/traps.c
7130 @@ -92,17 +92,15 @@ static void show_raw_backtrace(unsigned long reg29)
7131 #ifdef CONFIG_KALLSYMS
7134 - while (!kstack_end(sp)) {
7135 - unsigned long __user *p =
7136 - (unsigned long __user *)(unsigned long)sp++;
7137 - if (__get_user(addr, p)) {
7138 - printk(" (Bad stack address)");
7140 +#define IS_KVA01(a) ((((unsigned long)a) & 0xc0000000) == 0x80000000)
7141 + if (IS_KVA01(sp)) {
7142 + while (!kstack_end(sp)) {
7144 + if (__kernel_text_address(addr))
7145 + print_ip_sym(addr);
7147 - if (__kernel_text_address(addr))
7148 - print_ip_sym(addr);
7154 #ifdef CONFIG_KALLSYMS
7155 diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
7156 index 0b97d47..d8f36b5 100644
7157 --- a/arch/mips/mti-malta/malta-time.c
7158 +++ b/arch/mips/mti-malta/malta-time.c
7160 #include <linux/time.h>
7161 #include <linux/timex.h>
7162 #include <linux/mc146818rtc.h>
7163 +#include <linux/perfmon_kern.h>
7165 #include <asm/mipsregs.h>
7166 #include <asm/mipsmtregs.h>
7167 diff --git a/arch/mips/perfmon/Kconfig b/arch/mips/perfmon/Kconfig
7168 new file mode 100644
7169 index 0000000..b426eea
7171 +++ b/arch/mips/perfmon/Kconfig
7173 +menu "Hardware Performance Monitoring support"
7175 + bool "Perfmon2 performance monitoring interface"
7178 + Enables the perfmon2 interface to access the hardware
7179 + performance counters. See <http://perfmon2.sf.net/> for
7182 +config PERFMON_DEBUG
7183 + bool "Perfmon debugging"
7185 + depends on PERFMON
7187 + Enables perfmon debugging support
7189 +config PERFMON_DEBUG_FS
7190 + bool "Enable perfmon statistics reporting via debugfs"
7192 + depends on PERFMON && DEBUG_FS
7194 + Enable collection and reporting of perfmon timing statistics under
7195 + debugfs. This is used for debugging and performance analysis of the
7196 + subsystem. The debugfs filesystem must be mounted.
7198 +config PERFMON_FLUSH
7199 + bool "Flush sampling buffer when modified"
7200 + depends on PERFMON
7203 + On some MIPS models, cache aliasing may cause invalid
7204 + data to be read from the perfmon sampling buffer. Use this option
7205 + to flush the buffer when it is modified to ensure valid data is
7206 + visible at the user level.
7208 +config PERFMON_ALIGN
7209 + bool "Align sampling buffer to avoid cache aliasing"
7210 + depends on PERFMON
7213 + On some MIPS models, cache aliasing may cause invalid
7214 + data to be read from the perfmon sampling buffer. By forcing a bigger
7215 + page alignment (4-page), one can guarantee the buffer virtual address
7216 + will conflict in the cache with the user level mapping of the buffer
7217 + thereby ensuring a consistent view by user programs.
7219 +config PERFMON_DEBUG
7220 + bool "Perfmon debugging"
7221 + depends on PERFMON
7223 + depends on PERFMON
7225 + Enables perfmon debugging support
7227 +config PERFMON_MIPS64
7228 + tristate "Support for MIPS64 hardware performance counters"
7229 + depends on PERFMON
7232 + Enables support for the MIPS64 hardware performance counters"
7234 diff --git a/arch/mips/perfmon/Makefile b/arch/mips/perfmon/Makefile
7235 new file mode 100644
7236 index 0000000..153b83f
7238 +++ b/arch/mips/perfmon/Makefile
7240 +obj-$(CONFIG_PERFMON) += perfmon.o
7241 +obj-$(CONFIG_PERFMON_MIPS64) += perfmon_mips64.o
7242 diff --git a/arch/mips/perfmon/perfmon.c b/arch/mips/perfmon/perfmon.c
7243 new file mode 100644
7244 index 0000000..6615a77
7246 +++ b/arch/mips/perfmon/perfmon.c
7249 + * This file implements the MIPS64 specific
7250 + * support for the perfmon2 interface
7252 + * Copyright (c) 2005 Philip J. Mucci
7254 + * based on versions for other architectures:
7255 + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
7256 + * Contributed by Stephane Eranian <eranian@htrpl.hp.com>
7258 + * This program is free software; you can redistribute it and/or
7259 + * modify it under the terms of version 2 of the GNU General Public
7260 + * License as published by the Free Software Foundation.
7262 + * This program is distributed in the hope that it will be useful,
7263 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7264 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7265 + * General Public License for more details.
7267 + * You should have received a copy of the GNU General Public License
7268 + * along with this program; if not, write to the Free Software
7269 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7272 +#include <linux/interrupt.h>
7273 +#include <linux/module.h>
7274 +#include <linux/perfmon_kern.h>
7277 + * collect pending overflowed PMDs. Called from pfm_ctxsw()
7278 + * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
7279 + * and set->npend_ovfls. Interrupts are masked
7281 +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
7283 + u64 new_val, wmask;
7284 + u64 *used_mask, *intr_pmds;
7285 + u64 mask[PFM_PMD_BV];
7286 + unsigned int i, max;
7288 + max = ctx->regs.max_intr_pmd;
7289 + intr_pmds = ctx->regs.intr_pmds;
7290 + used_mask = set->used_pmds;
7292 + wmask = 1ULL << pfm_pmu_conf->counter_width;
7294 + bitmap_and(cast_ulp(mask),
7295 + cast_ulp(intr_pmds),
7296 + cast_ulp(used_mask),
7300 + * check all PMD that can generate interrupts
7301 + * (that includes counters)
7303 + for (i = 0; i < max; i++) {
7304 + if (test_bit(i, mask)) {
7305 + new_val = pfm_arch_read_pmd(ctx, i);
7307 + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
7308 + i, (unsigned long long)new_val,
7309 + (new_val&wmask) ? 1 : 0);
7311 + if (new_val & wmask) {
7312 + __set_bit(i, set->povfl_pmds);
7313 + set->npend_ovfls++;
7319 +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
7320 + struct pfm_event_set *set)
7322 + unsigned int i, max;
7324 + max = ctx->regs.max_pmc;
7327 + * clear enable bits, assume all pmcs are enable pmcs
7329 + for (i = 0; i < max; i++) {
7330 + if (test_bit(i, set->used_pmcs))
7331 + pfm_arch_write_pmc(ctx, i, 0);
7334 + if (set->npend_ovfls)
7337 + __pfm_get_ovfl_pmds(ctx, set);
7341 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
7342 + * Context is locked. Interrupts are masked. Monitoring is active.
7343 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
7346 + * must stop monitoring for the task
7349 + * non-zero : did not save PMDs (as part of stopping the PMU)
7350 + * 0 : saved PMDs (no need to save them in caller)
7352 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
7355 + * disable lazy restore of PMC registers.
7357 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
7360 + * if masked, monitoring is stopped, thus there is no
7361 + * need to stop the PMU again and there is no need to
7362 + * check for pending overflows. This is not just an
7363 + * optimization, this is also for correctness as you
7364 + * may end up detecting overflows twice.
7366 + if (ctx->state == PFM_CTX_MASKED)
7369 + pfm_stop_active(task, ctx, ctx->active_set);
7375 + * Called from pfm_stop() and pfm_ctxsw()
7376 + * Interrupts are masked. Context is locked. Set is the active set.
7379 + * task is not necessarily current. If not current task, then
7380 + * task is guaranteed stopped and off any cpu. Access to PMU
7381 + * is not guaranteed. Interrupts are masked. Context is locked.
7382 + * Set is the active set.
7384 + * For system-wide:
7387 + * must disable active monitoring. ctx cannot be NULL
7389 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
7392 + * no need to go through stop_save()
7393 + * if we are already stopped
7395 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
7399 + * stop live registers and collect pending overflow
7401 + if (task == current)
7402 + pfm_stop_active(task, ctx, ctx->active_set);
7406 + * called from pfm_start() or pfm_ctxsw() when idle task and
7407 + * EXCL_IDLE is on.
7409 + * Interrupts are masked. Context is locked. Set is the active set.
7412 + * Task is not necessarily current. If not current task, then task
7413 + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
7415 + * For system-wide:
7416 + * task is always current
7418 + * must enable active monitoring.
7420 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
7422 + struct pfm_event_set *set;
7423 + unsigned int i, max_pmc;
7425 + if (task != current)
7428 + set = ctx->active_set;
7429 + max_pmc = ctx->regs.max_pmc;
7431 + for (i = 0; i < max_pmc; i++) {
7432 + if (test_bit(i, set->used_pmcs))
7433 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
7438 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
7439 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
7440 + * context is locked. Interrupts are masked. set cannot be NULL.
7441 + * Access to the PMU is guaranteed.
7443 + * function must restore all PMD registers from set.
7445 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
7447 + u64 ovfl_mask, val;
7450 + unsigned int max_pmd;
7452 + max_pmd = ctx->regs.max_pmd;
7453 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
7454 + impl_pmds = ctx->regs.pmds;
7457 + * must restore all pmds to avoid leaking
7458 + * information to user.
7460 + for (i = 0; i < max_pmd; i++) {
7462 + if (test_bit(i, impl_pmds) == 0)
7465 + val = set->pmds[i].value;
7468 + * set upper bits for counter to ensure
7469 + * overflow will trigger
7473 + pfm_arch_write_pmd(ctx, i, val);
7478 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
7479 + * pfm_context_load_sys(), pfm_ctxsw().
7480 + * Context is locked. Interrupts are masked. set cannot be NULL.
7481 + * Access to the PMU is guaranteed.
7483 + * function must restore all PMC registers from set, if needed.
7485 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
7488 + unsigned int i, max_pmc;
7490 + max_pmc = ctx->regs.max_pmc;
7491 + impl_pmcs = ctx->regs.pmcs;
7494 + * - by default no PMCS measures anything
7495 + * - on ctxswout, all used PMCs are disabled (cccr enable bit cleared)
7496 + * hence when masked we do not need to restore anything
7498 + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
7502 + * restore all pmcs
7504 + for (i = 0; i < max_pmc; i++)
7505 + if (test_bit(i, impl_pmcs))
7506 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
7509 +char *pfm_arch_get_pmu_module_name(void)
7511 + switch (cpu_data->cputype) {
7514 +#if defined(CPU_74K)
7525 + return "perfmon_mips64";
7532 +int perfmon_perf_irq(void)
7534 + /* BLATANTLY STOLEN FROM OPROFILE, then modified */
7535 + struct pt_regs *regs;
7536 + unsigned int counters = pfm_pmu_conf->regs_all.max_pmc;
7537 + unsigned int control;
7538 + unsigned int counter;
7540 + regs = get_irq_regs();
7541 + switch (counters) {
7542 +#define HANDLE_COUNTER(n) \
7544 + control = read_c0_perfctrl ## n(); \
7545 + counter = read_c0_perfcntr ## n(); \
7546 + if ((control & MIPS64_PMC_INT_ENABLE_MASK) && \
7547 + (counter & MIPS64_PMD_INTERRUPT)) { \
7548 + pfm_interrupt_handler(instruction_pointer(regs),\
7560 +EXPORT_SYMBOL(perfmon_perf_irq);
7561 diff --git a/arch/mips/perfmon/perfmon_mips64.c b/arch/mips/perfmon/perfmon_mips64.c
7562 new file mode 100644
7563 index 0000000..78cb43d
7565 +++ b/arch/mips/perfmon/perfmon_mips64.c
7568 + * This file contains the MIPS64 and decendent PMU register description tables
7569 + * and pmc checker used by perfmon.c.
7571 + * Copyright (c) 2005 Philip Mucci
7573 + * Based on perfmon_p6.c:
7574 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
7575 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
7577 + * This program is free software; you can redistribute it and/or
7578 + * modify it under the terms of version 2 of the GNU General Public
7579 + * License as published by the Free Software Foundation.
7581 + * This program is distributed in the hope that it will be useful,
7582 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7583 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7584 + * General Public License for more details.
7586 + * You should have received a copy of the GNU General Public License
7587 + * along with this program; if not, write to the Free Software
7588 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7591 +#include <linux/module.h>
7592 +#include <linux/perfmon_kern.h>
7594 +MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>");
7595 +MODULE_DESCRIPTION("MIPS64 PMU description tables");
7596 +MODULE_LICENSE("GPL");
7601 + * RSVD: reserved bits must be 1
7603 +#define PFM_MIPS64_PMC_RSVD 0xfffffffffffff810ULL
7604 +#define PFM_MIPS64_PMC_VAL (1ULL<<4)
7606 +extern int null_perf_irq(struct pt_regs *regs);
7607 +extern int (*perf_irq)(struct pt_regs *regs);
7608 +extern int perfmon_perf_irq(struct pt_regs *regs);
7610 +static struct pfm_arch_pmu_info pfm_mips64_pmu_info;
7612 +static struct pfm_regmap_desc pfm_mips64_pmc_desc[] = {
7613 +/* pmc0 */ PMC_D(PFM_REG_I64, "CP0_25_0", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 0),
7614 +/* pmc1 */ PMC_D(PFM_REG_I64, "CP0_25_1", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 1),
7615 +/* pmc2 */ PMC_D(PFM_REG_I64, "CP0_25_2", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 2),
7616 +/* pmc3 */ PMC_D(PFM_REG_I64, "CP0_25_3", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 3)
7618 +#define PFM_MIPS64_NUM_PMCS ARRAY_SIZE(pfm_mips64_pmc_desc)
7620 +static struct pfm_regmap_desc pfm_mips64_pmd_desc[] = {
7621 +/* pmd0 */ PMD_D(PFM_REG_C, "CP0_25_0", 0),
7622 +/* pmd1 */ PMD_D(PFM_REG_C, "CP0_25_1", 1),
7623 +/* pmd2 */ PMD_D(PFM_REG_C, "CP0_25_2", 2),
7624 +/* pmd3 */ PMD_D(PFM_REG_C, "CP0_25_3", 3)
7626 +#define PFM_MIPS64_NUM_PMDS ARRAY_SIZE(pfm_mips64_pmd_desc)
7628 +static int pfm_mips64_probe_pmu(void)
7630 + struct cpuinfo_mips *c = ¤t_cpu_data;
7632 + switch (c->cputype) {
7635 +#if defined(CPU_74K)
7649 + PFM_INFO("Unknown cputype 0x%x", c->cputype);
7655 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
7657 +static struct pfm_pmu_config pfm_mips64_pmu_conf = {
7658 + .pmu_name = "MIPS", /* placeholder */
7659 + .counter_width = 31,
7660 + .pmd_desc = pfm_mips64_pmd_desc,
7661 + .pmc_desc = pfm_mips64_pmc_desc,
7662 + .num_pmc_entries = PFM_MIPS64_NUM_PMCS,
7663 + .num_pmd_entries = PFM_MIPS64_NUM_PMDS,
7664 + .probe_pmu = pfm_mips64_probe_pmu,
7665 + .flags = PFM_PMU_BUILTIN_FLAG,
7666 + .owner = THIS_MODULE,
7667 + .pmu_info = &pfm_mips64_pmu_info
7670 +static inline int n_counters(void)
7672 + if (!(read_c0_config1() & MIPS64_CONFIG_PMC_MASK))
7674 + if (!(read_c0_perfctrl0() & MIPS64_PMC_CTR_MASK))
7676 + if (!(read_c0_perfctrl1() & MIPS64_PMC_CTR_MASK))
7678 + if (!(read_c0_perfctrl2() & MIPS64_PMC_CTR_MASK))
7683 +static int __init pfm_mips64_pmu_init_module(void)
7685 + struct cpuinfo_mips *c = ¤t_cpu_data;
7689 + switch (c->cputype) {
7691 + pfm_mips64_pmu_conf.pmu_name = "MIPS5KC";
7694 + pfm_mips64_pmu_conf.pmu_name = "MIPSR12000";
7697 + pfm_mips64_pmu_conf.pmu_name = "MIPS20KC";
7700 + pfm_mips64_pmu_conf.pmu_name = "MIPS24K";
7703 + pfm_mips64_pmu_conf.pmu_name = "MIPS25KF";
7706 + pfm_mips64_pmu_conf.pmu_name = "SB1";
7709 + pfm_mips64_pmu_conf.pmu_name = "SB1A";
7713 + pfm_mips64_pmu_conf.pmu_name = "MIPS34K";
7715 +#if defined(CPU_74K)
7717 + pfm_mips64_pmu_conf.pmu_name = "MIPS74K";
7722 + PFM_INFO("Unknown cputype 0x%x", c->cputype);
7726 + /* The R14k and older performance counters have to */
7727 + /* be hard-coded, as there is no support for auto-detection */
7728 + if ((c->cputype == CPU_R12000) || (c->cputype == CPU_R14000))
7730 + else if (c->cputype == CPU_R10000)
7733 + num = n_counters();
7736 + PFM_INFO("cputype 0x%x has no counters", c->cputype);
7739 + /* mark remaining counters unavailable */
7740 + for (i = num; i < PFM_MIPS64_NUM_PMCS; i++)
7741 + pfm_mips64_pmc_desc[i].type = PFM_REG_NA;
7743 + for (i = num; i < PFM_MIPS64_NUM_PMDS; i++)
7744 + pfm_mips64_pmd_desc[i].type = PFM_REG_NA;
7746 + /* set the PMC_RSVD mask */
7747 + switch (c->cputype) {
7751 + /* 4-bits for event */
7752 + temp_mask = 0xfffffffffffffe10ULL;
7756 + /* 5-bits for event */
7757 + temp_mask = 0xfffffffffffffc10ULL;
7760 + /* 6-bits for event */
7761 + temp_mask = 0xfffffffffffff810ULL;
7763 + for (i = 0; i < PFM_MIPS64_NUM_PMCS; i++)
7764 + pfm_mips64_pmc_desc[i].rsvd_msk = temp_mask;
7766 + pfm_mips64_pmu_conf.num_pmc_entries = num;
7767 + pfm_mips64_pmu_conf.num_pmd_entries = num;
7769 + pfm_mips64_pmu_info.pmu_style = c->cputype;
7771 + ret = pfm_pmu_register(&pfm_mips64_pmu_conf);
7773 + perf_irq = perfmon_perf_irq;
7777 +static void __exit pfm_mips64_pmu_cleanup_module(void)
7779 + pfm_pmu_unregister(&pfm_mips64_pmu_conf);
7780 + perf_irq = null_perf_irq;
7783 +module_init(pfm_mips64_pmu_init_module);
7784 +module_exit(pfm_mips64_pmu_cleanup_module);
7785 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
7786 index 587da5e..a411389 100644
7787 --- a/arch/powerpc/Kconfig
7788 +++ b/arch/powerpc/Kconfig
7789 @@ -230,6 +230,8 @@ source "init/Kconfig"
7790 source "arch/powerpc/sysdev/Kconfig"
7791 source "arch/powerpc/platforms/Kconfig"
7793 +source "arch/powerpc/perfmon/Kconfig"
7795 menu "Kernel options"
7798 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
7799 index c6be19e..7ea20cb 100644
7800 --- a/arch/powerpc/Makefile
7801 +++ b/arch/powerpc/Makefile
7802 @@ -146,6 +146,7 @@ core-y += arch/powerpc/kernel/ \
7803 arch/powerpc/platforms/
7804 core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
7805 core-$(CONFIG_XMON) += arch/powerpc/xmon/
7806 +core-$(CONFIG_PERFMON) += arch/powerpc/perfmon/
7807 core-$(CONFIG_KVM) += arch/powerpc/kvm/
7809 drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
7810 diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
7811 index 5ab7d7f..88cb533 100644
7812 --- a/arch/powerpc/include/asm/Kbuild
7813 +++ b/arch/powerpc/include/asm/Kbuild
7814 @@ -21,6 +21,7 @@ header-y += resource.h
7815 header-y += sigcontext.h
7816 header-y += statfs.h
7818 +header-y += perfmon.h
7820 unifdef-y += bootx.h
7821 unifdef-y += byteorder.h
7822 diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
7823 index 8066eed..981db26 100644
7824 --- a/arch/powerpc/include/asm/cell-pmu.h
7825 +++ b/arch/powerpc/include/asm/cell-pmu.h
7828 /* Macros for the pm_status register. */
7829 #define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7)))
7830 +#define CBE_PM_OVERFLOW_CTRS(pm_status) (((pm_status) >> 24) & 0xff)
7831 +#define CBE_PM_ALL_OVERFLOW_INTR 0xff000000
7832 +#define CBE_PM_INTERVAL_INTR 0x00800000
7833 +#define CBE_PM_TRACE_BUFFER_FULL_INTR 0x00400000
7834 +#define CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR 0x00200000
7838 diff --git a/arch/powerpc/include/asm/cell-regs.h b/arch/powerpc/include/asm/cell-regs.h
7839 index fd6fd00..580786d 100644
7840 --- a/arch/powerpc/include/asm/cell-regs.h
7841 +++ b/arch/powerpc/include/asm/cell-regs.h
7842 @@ -117,8 +117,9 @@ struct cbe_pmd_regs {
7843 u8 pad_0x0c1c_0x0c20 [4]; /* 0x0c1c */
7844 #define CBE_PMD_FIR_MODE_M8 0x00800
7845 u64 fir_enable_mask; /* 0x0c20 */
7847 - u8 pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28]; /* 0x0c28 */
7848 + u8 pad_0x0c28_0x0c98 [0x0c98 - 0x0c28]; /* 0x0c28 */
7849 + u64 on_ramp_trace; /* 0x0c98 */
7850 + u64 pad_0x0ca0; /* 0x0ca0 */
7851 u64 ras_esc_0; /* 0x0ca8 */
7852 u8 pad_0x0cb0_0x1000 [0x1000 - 0x0cb0]; /* 0x0cb0 */
7854 @@ -218,7 +219,11 @@ extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu);
7857 struct cbe_mic_tm_regs {
7858 - u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */
7859 + u8 pad_0x0000_0x0010[0x0010 - 0x0000]; /* 0x0000 */
7861 + u64 MBL_debug; /* 0x0010 */
7863 + u8 pad_0x0018_0x0040[0x0040 - 0x0018]; /* 0x0018 */
7865 u64 mic_ctl_cnfg2; /* 0x0040 */
7866 #define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL
7867 @@ -303,6 +308,25 @@ struct cbe_mic_tm_regs {
7868 extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np);
7869 extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
7873 + * PPE Privileged MMIO Registers definition. (offset 0x500000 - 0x500fff)
7876 +struct cbe_ppe_priv_regs {
7877 + u8 pad_0x0000_0x0858[0x0858 - 0x0000]; /* 0x0000 */
7879 + u64 L2_debug1; /* 0x0858 */
7881 + u8 pad_0x0860_0x0958[0x0958 - 0x0860]; /* 0x0860 */
7883 + u64 ciu_dr1; /* 0x0958 */
7885 + u8 pad_0x0960_0x1000[0x1000 - 0x0960]; /* 0x0960 */
7888 +extern struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu);
7890 /* some utility functions to deal with SMT */
7891 extern u32 cbe_get_hw_thread_id(int cpu);
7892 extern u32 cbe_cpu_to_node(int cpu);
7893 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
7894 index 6493a39..ba9ead4 100644
7895 --- a/arch/powerpc/include/asm/paca.h
7896 +++ b/arch/powerpc/include/asm/paca.h
7897 @@ -97,6 +97,10 @@ struct paca_struct {
7898 u8 soft_enabled; /* irq soft-enable flag */
7899 u8 hard_enabled; /* set if irqs are enabled in MSR */
7900 u8 io_sync; /* writel() needs spin_unlock sync */
7901 +#ifdef CONFIG_PERFMON
7902 + u8 pmu_except_pending; /* PMU exception occurred while soft
7906 /* Stuff for accurate time accounting */
7907 u64 user_time; /* accumulated usermode TB ticks */
7908 diff --git a/arch/powerpc/include/asm/perfmon.h b/arch/powerpc/include/asm/perfmon.h
7909 new file mode 100644
7910 index 0000000..da0ae3b
7912 +++ b/arch/powerpc/include/asm/perfmon.h
7915 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
7916 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
7918 + * This file contains powerpc specific definitions for the perfmon
7921 + * This file MUST never be included directly. Use linux/perfmon.h.
7923 + * This program is free software; you can redistribute it and/or
7924 + * modify it under the terms of version 2 of the GNU General Public
7925 + * License as published by the Free Software Foundation.
7927 + * This program is distributed in the hope that it will be useful,
7928 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7929 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7930 + * General Public License for more details.
7932 + * You should have received a copy of the GNU General Public License
7933 + * along with this program; if not, write to the Free Software
7934 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7937 +#ifndef _ASM_POWERPC_PERFMON_H_
7938 +#define _ASM_POWERPC_PERFMON_H_
7941 + * arch-specific user visible interface definitions
7943 +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
7944 +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
7946 +#endif /* _ASM_POWERPC_PERFMON_H_ */
7947 diff --git a/arch/powerpc/include/asm/perfmon_kern.h b/arch/powerpc/include/asm/perfmon_kern.h
7948 new file mode 100644
7949 index 0000000..65ec984
7951 +++ b/arch/powerpc/include/asm/perfmon_kern.h
7954 + * Copyright (c) 2005 David Gibson, IBM Corporation.
7956 + * Based on other versions:
7957 + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
7958 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
7960 + * This file contains powerpc specific definitions for the perfmon
7963 + * This program is free software; you can redistribute it and/or
7964 + * modify it under the terms of version 2 of the GNU General Public
7965 + * License as published by the Free Software Foundation.
7967 + * This program is distributed in the hope that it will be useful,
7968 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7969 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7970 + * General Public License for more details.
7972 + * You should have received a copy of the GNU General Public License
7973 + * along with this program; if not, write to the Free Software
7974 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7977 +#ifndef _ASM_POWERPC_PERFMON_KERN_H_
7978 +#define _ASM_POWERPC_PERFMON_KERN_H_
7982 +#ifdef CONFIG_PERFMON
7984 +#include <asm/pmc.h>
7985 +#include <asm/unistd.h>
7987 +#define HID0_PMC5_6_GR_MODE (1UL << (63 - 40))
7989 +enum powerpc_pmu_type {
7990 + PFM_POWERPC_PMU_NONE,
7991 + PFM_POWERPC_PMU_604,
7992 + PFM_POWERPC_PMU_604e,
7993 + PFM_POWERPC_PMU_750, /* XXX: Minor event set diffs between IBM and Moto. */
7994 + PFM_POWERPC_PMU_7400,
7995 + PFM_POWERPC_PMU_7450,
7996 + PFM_POWERPC_PMU_POWER4,
7997 + PFM_POWERPC_PMU_POWER5,
7998 + PFM_POWERPC_PMU_POWER5p,
7999 + PFM_POWERPC_PMU_POWER6,
8000 + PFM_POWERPC_PMU_CELL,
8003 +struct pfm_arch_pmu_info {
8004 + enum powerpc_pmu_type pmu_style;
8006 + void (*write_pmc)(unsigned int cnum, u64 value);
8007 + void (*write_pmd)(unsigned int cnum, u64 value);
8009 + u64 (*read_pmd)(unsigned int cnum);
8011 + void (*enable_counters)(struct pfm_context *ctx,
8012 + struct pfm_event_set *set);
8013 + void (*disable_counters)(struct pfm_context *ctx,
8014 + struct pfm_event_set *set);
8016 + void (*irq_handler)(struct pt_regs *regs, struct pfm_context *ctx);
8017 + void (*get_ovfl_pmds)(struct pfm_context *ctx,
8018 + struct pfm_event_set *set);
8020 + /* The following routines are optional. */
8021 + void (*restore_pmcs)(struct pfm_context *ctx,
8022 + struct pfm_event_set *set);
8023 + void (*restore_pmds)(struct pfm_context *ctx,
8024 + struct pfm_event_set *set);
8026 + int (*ctxswout_thread)(struct task_struct *task,
8027 + struct pfm_context *ctx,
8028 + struct pfm_event_set *set);
8029 + void (*ctxswin_thread)(struct task_struct *task,
8030 + struct pfm_context *ctx,
8031 + struct pfm_event_set *set);
8032 + int (*load_context)(struct pfm_context *ctx);
8033 + void (*unload_context)(struct pfm_context *ctx);
8034 + int (*acquire_pmu)(u64 *unavail_pmcs, u64 *unavail_pmds);
8035 + void (*release_pmu)(void);
8036 + void *platform_info;
8037 + void (*resend_irq)(struct pfm_context *ctx);
8040 +#ifdef CONFIG_PPC32
8041 +#define PFM_ARCH_PMD_STK_ARG 6 /* conservative value */
8042 +#define PFM_ARCH_PMC_STK_ARG 6 /* conservative value */
8044 +#define PFM_ARCH_PMD_STK_ARG 8 /* conservative value */
8045 +#define PFM_ARCH_PMC_STK_ARG 8 /* conservative value */
8048 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
8050 + struct pfm_arch_pmu_info *arch_info;
8052 + arch_info = pfm_pmu_info();
8053 + arch_info->resend_irq(ctx);
8056 +static inline void pfm_arch_serialize(void)
8059 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
8060 + unsigned int cnum,
8063 + struct pfm_arch_pmu_info *arch_info;
8065 + arch_info = pfm_pmu_info();
8068 + * we only write to the actual register when monitoring is
8069 + * active (pfm_start was issued)
8071 + if (ctx && ctx->flags.started == 0)
8074 + BUG_ON(!arch_info->write_pmc);
8076 + arch_info->write_pmc(cnum, value);
8079 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
8080 + unsigned int cnum, u64 value)
8082 + struct pfm_arch_pmu_info *arch_info;
8084 + arch_info = pfm_pmu_info();
8086 + value &= pfm_pmu_conf->ovfl_mask;
8088 + BUG_ON(!arch_info->write_pmd);
8090 + arch_info->write_pmd(cnum, value);
8093 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
8095 + struct pfm_arch_pmu_info *arch_info;
8097 + arch_info = pfm_pmu_info();
8099 + BUG_ON(!arch_info->read_pmd);
8101 + return arch_info->read_pmd(cnum);
8105 + * For some CPUs, the upper bits of a counter must be set in order for the
8106 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
8107 + * and the upper bits are cleared. This function may be used to set them back.
8109 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
8110 + unsigned int cnum)
8112 + u64 val = pfm_arch_read_pmd(ctx, cnum);
8114 + /* This masks out overflow bit 31 */
8115 + pfm_arch_write_pmd(ctx, cnum, val);
8119 + * At certain points, perfmon needs to know if monitoring has been
8120 + * explicitely started/stopped by user via pfm_start/pfm_stop. The
8121 + * information is tracked in flags.started. However on certain
8122 + * architectures, it may be possible to start/stop directly from
8123 + * user level with a single assembly instruction bypassing
8124 + * the kernel. This function must be used to determine by
8125 + * an arch-specific mean if monitoring is actually started/stopped.
8127 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
8129 + return ctx->flags.started;
8132 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
8133 + struct pfm_context *ctx)
8136 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
8137 + struct pfm_context *ctx)
8140 +void pfm_arch_init_percpu(void);
8141 +int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
8142 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
8143 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
8144 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
8145 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
8146 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
8147 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
8148 +void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, struct pfm_event_set *set);
8149 +int pfm_arch_get_ovfl_pmds(struct pfm_context *ctx,
8150 + struct pfm_event_set *set);
8151 +char *pfm_arch_get_pmu_module_name(void);
8153 + * called from __pfm_interrupt_handler(). ctx is not NULL.
8154 + * ctx is locked. PMU interrupt is masked.
8156 + * must stop all monitoring to ensure handler has consistent view.
8157 + * must collect overflowed PMDs bitmask into povfls_pmds and
8158 + * npend_ovfls. If no interrupt detected then npend_ovfls
8159 + * must be set to zero.
8161 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, struct pfm_event_set *set)
8163 + pfm_arch_stop(current, ctx);
8166 +void powerpc_irq_handler(struct pt_regs *regs);
8169 + * unfreeze PMU from pfm_do_interrupt_handler()
8170 + * ctx may be NULL for spurious
8172 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
8174 + struct pfm_arch_pmu_info *arch_info;
8179 + PFM_DBG_ovfl("state=%d", ctx->state);
8181 + ctx->flags.started = 1;
8183 + if (ctx->state == PFM_CTX_MASKED)
8186 + arch_info = pfm_pmu_info();
8187 + BUG_ON(!arch_info->enable_counters);
8188 + arch_info->enable_counters(ctx, ctx->active_set);
8192 + * PowerPC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
8193 + * this routine needs to do it when switching sets on overflow
8195 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
8196 + struct pfm_event_set *set)
8198 + pfm_save_pmds(ctx, set);
8202 + * this function is called from the PMU interrupt handler ONLY.
8203 + * On PPC, the PMU is frozen via arch_stop, masking would be implemented
8204 + * via arch-stop as well. Given that the PMU is already stopped when
8205 + * entering the interrupt handler, we do not need to stop it again, so
8206 + * this function is a nop.
8208 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
8209 + struct pfm_event_set *set)
8213 + * Simply need to start the context in order to unmask.
8215 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
8216 + struct pfm_event_set *set)
8218 + pfm_arch_start(current, ctx);
8222 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
8227 +static inline int pfm_arch_context_create(struct pfm_context *ctx,
8233 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
8236 +/* not necessary on PowerPC */
8237 +static inline void pfm_cacheflush(void *addr, unsigned int len)
8241 + * function called from pfm_setfl_sane(). Context is locked
8242 + * and interrupts are masked.
8243 + * The value of flags is the value of ctx_flags as passed by
8246 + * function must check arch-specific set flags.
8248 + * 1 when flags are valid
8251 +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
8256 +static inline int pfm_arch_init(void)
8261 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
8263 + struct pfm_arch_pmu_info *arch_info;
8266 + arch_info = pfm_pmu_info();
8267 + if (arch_info->load_context)
8268 + rc = arch_info->load_context(ctx);
8273 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
8275 + struct pfm_arch_pmu_info *arch_info;
8277 + arch_info = pfm_pmu_info();
8278 + if (arch_info->unload_context)
8279 + arch_info->unload_context(ctx);
8282 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
8284 + struct pfm_arch_pmu_info *arch_info;
8287 + arch_info = pfm_pmu_info();
8288 + if (arch_info->acquire_pmu) {
8289 + rc = arch_info->acquire_pmu(unavail_pmcs, unavail_pmds);
8294 + return reserve_pmc_hardware(powerpc_irq_handler);
8297 +static inline void pfm_arch_pmu_release(void)
8299 + struct pfm_arch_pmu_info *arch_info;
8301 + arch_info = pfm_pmu_info();
8302 + if (arch_info->release_pmu)
8303 + arch_info->release_pmu();
8305 + release_pmc_hardware();
8308 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
8311 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
8314 +static inline int pfm_arch_get_base_syscall(void)
8316 + return __NR_pfm_create_context;
8319 +struct pfm_arch_context {
8320 + /* Cell: Most recent value of the pm_status
8321 + * register read by the interrupt handler.
8323 + * Interrupt handler sets last_read_updated if it
8324 + * just read and updated last_read_pm_status
8326 + u32 last_read_pm_status;
8327 + u32 last_read_updated;
8328 + u64 powergs_pmc5, powergs_pmc6;
8329 + u64 delta_tb, delta_tb_start;
8330 + u64 delta_purr, delta_purr_start;
8333 +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
8335 + * PowerPC does not need extra alignment requirements for the sampling buffer
8337 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
8339 +#endif /* CONFIG_PERFMON */
8341 +#endif /* __KERNEL__ */
8342 +#endif /* _ASM_POWERPC_PERFMON_KERN_H_ */
8343 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
8344 index c6d1ab6..a9f3ad0 100644
8345 --- a/arch/powerpc/include/asm/reg.h
8346 +++ b/arch/powerpc/include/asm/reg.h
8348 #define PV_POWER5 0x003A
8349 #define PV_POWER5p 0x003B
8350 #define PV_970FX 0x003C
8351 +#define PV_POWER6 0x003E
8352 #define PV_630 0x0040
8353 #define PV_630p 0x0041
8354 #define PV_970MP 0x0044
8355 diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
8356 index f6cc7a4..0164841 100644
8357 --- a/arch/powerpc/include/asm/systbl.h
8358 +++ b/arch/powerpc/include/asm/systbl.h
8359 @@ -322,3 +322,15 @@ SYSCALL_SPU(epoll_create1)
8362 SYSCALL(inotify_init1)
8363 +SYSCALL(pfm_create_context)
8364 +SYSCALL(pfm_write_pmcs)
8365 +SYSCALL(pfm_write_pmds)
8366 +SYSCALL(pfm_read_pmds)
8367 +SYSCALL(pfm_load_context)
8370 +SYSCALL(pfm_restart)
8371 +SYSCALL(pfm_create_evtsets)
8372 +SYSCALL(pfm_getinfo_evtsets)
8373 +SYSCALL(pfm_delete_evtsets)
8374 +SYSCALL(pfm_unload_context)
8375 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
8376 index 9665a26..6cda9f9 100644
8377 --- a/arch/powerpc/include/asm/thread_info.h
8378 +++ b/arch/powerpc/include/asm/thread_info.h
8379 @@ -130,10 +130,12 @@ static inline struct thread_info *current_thread_info(void)
8380 #define _TIF_FREEZE (1<<TIF_FREEZE)
8381 #define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
8382 #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
8383 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
8384 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
8385 #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
8387 #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
8388 - _TIF_NOTIFY_RESUME)
8389 + _TIF_NOTIFY_RESUME | _TIF_PERFMON_WORK)
8390 #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
8392 /* Bits in local_flags */
8393 diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
8394 index e07d0c7..6226cba 100644
8395 --- a/arch/powerpc/include/asm/unistd.h
8396 +++ b/arch/powerpc/include/asm/unistd.h
8397 @@ -341,10 +341,22 @@
8398 #define __NR_dup3 316
8399 #define __NR_pipe2 317
8400 #define __NR_inotify_init1 318
8401 +#define __NR_pfm_create_context 319
8402 +#define __NR_pfm_write_pmcs 320
8403 +#define __NR_pfm_write_pmds 321
8404 +#define __NR_pfm_read_pmds 322
8405 +#define __NR_pfm_load_context 323
8406 +#define __NR_pfm_start 324
8407 +#define __NR_pfm_stop 325
8408 +#define __NR_pfm_restart 326
8409 +#define __NR_pfm_create_evtsets 327
8410 +#define __NR_pfm_getinfo_evtsets 328
8411 +#define __NR_pfm_delete_evtsets 329
8412 +#define __NR_pfm_unload_context 330
8416 -#define __NR_syscalls 319
8417 +#define __NR_syscalls 331
8419 #define __NR__exit __NR_exit
8420 #define NR_syscalls __NR_syscalls
8421 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
8422 index 1cbbf70..198645f 100644
8423 --- a/arch/powerpc/kernel/entry_32.S
8424 +++ b/arch/powerpc/kernel/entry_32.S
8426 * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
8428 #if MSR_KERNEL >= 0x10000
8429 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
8430 +#define LOAD_MSR_KERNEL(r, x) lis r,(x)@ha; ori r,r,(x)@l
8432 #define LOAD_MSR_KERNEL(r, x) li r,(x)
8434 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
8435 index 2d802e9..77a090d 100644
8436 --- a/arch/powerpc/kernel/entry_64.S
8437 +++ b/arch/powerpc/kernel/entry_64.S
8438 @@ -643,6 +643,10 @@ user_work:
8439 b .ret_from_except_lite
8442 +#ifdef CONFIG_PERFMON
8443 + addi r3,r1,STACK_FRAME_OVERHEAD
8444 + bl .pfm_handle_work
8445 +#endif /* CONFIG_PERFMON */
8446 addi r3,r1,STACK_FRAME_OVERHEAD
8449 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
8450 index d972dec..b255fba 100644
8451 --- a/arch/powerpc/kernel/irq.c
8452 +++ b/arch/powerpc/kernel/irq.c
8453 @@ -104,6 +104,24 @@ static inline notrace void set_soft_enabled(unsigned long enable)
8454 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
8457 +#ifdef CONFIG_PERFMON
8458 +static inline unsigned long get_pmu_except_pending(void)
8460 + unsigned long pending;
8462 + __asm__ __volatile__("lbz %0,%1(13)"
8463 + : "=r" (pending) : "i" (offsetof(struct paca_struct, pmu_except_pending)));
8468 +static inline void set_pmu_except_pending(unsigned long pending)
8470 + __asm__ __volatile__("stb %0,%1(13)"
8471 + : : "r" (pending), "i" (offsetof(struct paca_struct, pmu_except_pending)));
8473 +#endif /* CONFIG_PERFMON */
8475 notrace void raw_local_irq_restore(unsigned long en)
8478 @@ -162,6 +180,19 @@ notrace void raw_local_irq_restore(unsigned long en)
8479 lv1_get_version_info(&tmp);
8482 +#ifdef CONFIG_PERFMON
8484 + * If a PMU exception occurred while interrupts were soft disabled,
8485 + * force a PMU exception.
8487 + if (get_pmu_except_pending()) {
8488 + set_pmu_except_pending(0);
8489 + /* Make sure we trigger the edge detection circuitry */
8490 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
8491 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
8493 +#endif /* CONFIG_PERFMON */
8495 __hard_irq_enable();
8497 EXPORT_SYMBOL(raw_local_irq_restore);
8498 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
8499 index 957bded..32dbc8e 100644
8500 --- a/arch/powerpc/kernel/process.c
8501 +++ b/arch/powerpc/kernel/process.c
8503 #include <linux/mqueue.h>
8504 #include <linux/hardirq.h>
8505 #include <linux/utsname.h>
8506 +#include <linux/perfmon_kern.h>
8508 #include <asm/pgtable.h>
8509 #include <asm/uaccess.h>
8510 @@ -393,9 +394,14 @@ struct task_struct *__switch_to(struct task_struct *prev,
8511 new_thread->start_tb = current_tb;
8515 local_irq_save(flags);
8517 + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW))
8518 + pfm_ctxsw_out(prev, new);
8520 + if (test_tsk_thread_flag(new, TIF_PERFMON_CTXSW))
8521 + pfm_ctxsw_in(prev, new);
8523 account_system_vtime(current);
8524 account_process_vtime(current);
8525 calculate_steal_time();
8526 @@ -544,6 +550,7 @@ void show_regs(struct pt_regs * regs)
8527 void exit_thread(void)
8529 discard_lazy_cpu_state();
8530 + pfm_exit_thread();
8533 void flush_thread(void)
8534 @@ -669,6 +676,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
8536 kregs->nip = (unsigned long)ret_from_fork;
8538 + pfm_copy_thread(p);
8542 diff --git a/arch/powerpc/perfmon/Kconfig b/arch/powerpc/perfmon/Kconfig
8543 new file mode 100644
8544 index 0000000..3f4bbf2
8546 +++ b/arch/powerpc/perfmon/Kconfig
8548 +menu "Hardware Performance Monitoring support"
8550 + bool "Perfmon2 performance monitoring interface"
8553 + Enables the perfmon2 interface to access the hardware
8554 + performance counters. See <http://perfmon2.sf.net/> for
8557 +config PERFMON_DEBUG
8558 + bool "Perfmon debugging"
8560 + depends on PERFMON
8562 + Enables perfmon debugging support
8564 +config PERFMON_DEBUG_FS
8565 + bool "Enable perfmon statistics reporting via debugfs"
8567 + depends on PERFMON && DEBUG_FS
8569 + Enable collection and reporting of perfmon timing statistics under
8570 + debugfs. This is used for debugging and performance analysis of the
8571 + subsystem. The debugfs filesystem must be mounted.
8573 +config PERFMON_POWER4
8574 + tristate "Support for Power4 hardware performance counters"
8575 + depends on PERFMON && PPC64
8578 + Enables support for the Power 4 hardware performance counters
8581 +config PERFMON_POWER5
8582 + tristate "Support for Power5 hardware performance counters"
8583 + depends on PERFMON && PPC64
8586 + Enables support for the Power 5 hardware performance counters
8589 +config PERFMON_POWER6
8590 + tristate "Support for Power6 hardware performance counters"
8591 + depends on PERFMON && PPC64
8594 + Enables support for the Power 6 hardware performance counters
8597 +config PERFMON_PPC32
8598 + tristate "Support for PPC32 hardware performance counters"
8599 + depends on PERFMON && PPC32
8602 + Enables support for the PPC32 hardware performance counters
8605 +config PERFMON_CELL
8606 + tristate "Support for Cell hardware performance counters"
8607 + depends on PERFMON && PPC_CELL
8608 + select PS3_LPM if PPC_PS3
8611 + Enables support for the Cell hardware performance counters.
8615 diff --git a/arch/powerpc/perfmon/Makefile b/arch/powerpc/perfmon/Makefile
8616 new file mode 100644
8617 index 0000000..300661f
8619 +++ b/arch/powerpc/perfmon/Makefile
8621 +obj-$(CONFIG_PERFMON) += perfmon.o
8622 +obj-$(CONFIG_PERFMON_POWER4) += perfmon_power4.o
8623 +obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o
8624 +obj-$(CONFIG_PERFMON_POWER6) += perfmon_power6.o
8625 +obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o
8626 +obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o
8627 diff --git a/arch/powerpc/perfmon/perfmon.c b/arch/powerpc/perfmon/perfmon.c
8628 new file mode 100644
8629 index 0000000..51a8b6a
8631 +++ b/arch/powerpc/perfmon/perfmon.c
8634 + * This file implements the powerpc specific
8635 + * support for the perfmon2 interface
8637 + * Copyright (c) 2005 David Gibson, IBM Corporation.
8639 + * based on versions for other architectures:
8640 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
8641 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
8643 + * This program is free software; you can redistribute it and/or
8644 + * modify it under the terms of version 2 of the GNU General Public
8645 + * License as published by the Free Software Foundation.
8647 + * This program is distributed in the hope that it will be useful,
8648 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8649 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8650 + * General Public License for more details.
8652 + * You should have received a copy of the GNU General Public License
8653 + * along with this program; if not, write to the Free Software
8654 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
8657 +#include <linux/interrupt.h>
8658 +#include <linux/perfmon_kern.h>
8660 +static void pfm_stop_active(struct task_struct *task,
8661 + struct pfm_context *ctx, struct pfm_event_set *set)
8663 + struct pfm_arch_pmu_info *arch_info;
8665 + arch_info = pfm_pmu_info();
8666 + BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds);
8668 + arch_info->disable_counters(ctx, set);
8670 + if (set->npend_ovfls)
8673 + arch_info->get_ovfl_pmds(ctx, set);
8677 + * Called from pfm_save_pmds(). Interrupts are masked. Registers are
8678 + * already saved away.
8680 +void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
8681 + struct pfm_event_set *set)
8684 + u64 *used_pmds, *intr_pmds;
8686 + num = set->nused_pmds;
8687 + used_pmds = set->used_pmds;
8688 + intr_pmds = ctx->regs.intr_pmds;
8690 + for (i = 0; num; i++)
8691 + if (likely(test_bit(i, used_pmds))) {
8692 + if (likely(test_bit(i, intr_pmds)))
8693 + pfm_write_pmd(ctx, i, 0);
8699 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
8700 + * Context is locked. Interrupts are masked. Monitoring is active.
8701 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
8704 + * must stop monitoring for the task
8706 + * non-zero : did not save PMDs (as part of stopping the PMU)
8707 + * 0 : saved PMDs (no need to save them in caller)
8709 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
8711 + struct pfm_arch_pmu_info *arch_info;
8713 + arch_info = pfm_pmu_info();
8715 + * disable lazy restore of the PMC/PMD registers.
8717 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
8719 + if (ctx->state == PFM_CTX_MASKED)
8722 + pfm_stop_active(task, ctx, ctx->active_set);
8724 + if (arch_info->ctxswout_thread)
8725 + arch_info->ctxswout_thread(task, ctx, ctx->active_set);
8727 + return pfm_arch_is_active(ctx);
8731 + * Called from pfm_ctxsw
8733 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
8735 + struct pfm_arch_pmu_info *arch_info;
8737 + arch_info = pfm_pmu_info();
8738 + if (ctx->state != PFM_CTX_MASKED && ctx->flags.started == 1) {
8739 + BUG_ON(!arch_info->enable_counters);
8740 + arch_info->enable_counters(ctx, ctx->active_set);
8743 + if (arch_info->ctxswin_thread)
8744 + arch_info->ctxswin_thread(task, ctx, ctx->active_set);
8748 + * Called from pfm_stop() and idle notifier
8750 + * Interrupts are masked. Context is locked. Set is the active set.
8753 + * task is not necessarily current. If not current task, then
8754 + * task is guaranteed stopped and off any cpu. Access to PMU
8755 + * is not guaranteed. Interrupts are masked. Context is locked.
8756 + * Set is the active set.
8758 + * For system-wide:
8761 + * must disable active monitoring. ctx cannot be NULL
8763 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
8766 + * no need to go through stop_save()
8767 + * if we are already stopped
8769 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
8773 + * stop live registers and collect pending overflow
8775 + if (task == current)
8776 + pfm_stop_active(task, ctx, ctx->active_set);
8780 + * Enable active monitoring. Called from pfm_start() and
8781 + * pfm_arch_unmask_monitoring().
8783 + * Interrupts are masked. Context is locked. Set is the active set.
8786 + * Task is not necessarily current. If not current task, then task
8787 + * is guaranteed stopped and off any cpu. No access to PMU if task
8790 + * For system-wide:
8791 + * Task is always current
8793 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
8795 + struct pfm_arch_pmu_info *arch_info;
8797 + arch_info = pfm_pmu_info();
8798 + if (task != current)
8801 + BUG_ON(!arch_info->enable_counters);
8803 + arch_info->enable_counters(ctx, ctx->active_set);
8807 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
8808 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
8809 + * context is locked. Interrupts are masked. set cannot be NULL.
8810 + * Access to the PMU is guaranteed.
8812 + * function must restore all PMD registers from set.
8814 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
8816 + struct pfm_arch_pmu_info *arch_info;
8820 + arch_info = pfm_pmu_info();
8822 + /* The model-specific module can override the default
8823 + * restore-PMD method.
8825 + if (arch_info->restore_pmds)
8826 + return arch_info->restore_pmds(ctx, set);
8828 + num = set->nused_pmds;
8829 + used_pmds = set->used_pmds;
8831 + for (i = 0; num; i++) {
8832 + if (likely(test_bit(i, used_pmds))) {
8833 + pfm_write_pmd(ctx, i, set->pmds[i].value);
8840 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
8841 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
8842 + * context is locked. Interrupts are masked. set cannot be NULL.
8843 + * Access to the PMU is guaranteed.
8845 + * function must restore all PMC registers from set, if needed.
8847 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
8849 + struct pfm_arch_pmu_info *arch_info;
8851 + unsigned int i, max_pmc, reg;
8853 + arch_info = pfm_pmu_info();
8854 + /* The model-specific module can override the default
8855 + * restore-PMC method.
8857 + if (arch_info->restore_pmcs)
8858 + return arch_info->restore_pmcs(ctx, set);
8860 + /* The "common" powerpc model's enable the counters simply by writing
8861 + * all the control registers. Therefore, if we're masked or stopped we
8862 + * don't need to bother restoring the PMCs now.
8864 + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
8867 + max_pmc = ctx->regs.max_pmc;
8868 + impl_pmcs = ctx->regs.pmcs;
8871 + * Restore all pmcs in reverse order to ensure the counters aren't
8872 + * enabled before their event selectors are set correctly.
8874 + reg = max_pmc - 1;
8875 + for (i = 0; i < max_pmc; i++) {
8876 + if (test_bit(reg, impl_pmcs))
8877 + pfm_arch_write_pmc(ctx, reg, set->pmcs[reg]);
8882 +char *pfm_arch_get_pmu_module_name(void)
8884 + unsigned int pvr = mfspr(SPRN_PVR);
8886 + switch (PVR_VER(pvr)) {
8887 + case 0x0004: /* 604 */
8888 + case 0x0009: /* 604e; */
8889 + case 0x000A: /* 604ev */
8890 + case 0x0008: /* 750/740 */
8891 + case 0x7000: /* 750FX */
8893 + case 0x7002: /* 750GX */
8894 + case 0x000C: /* 7400 */
8895 + case 0x800C: /* 7410 */
8896 + case 0x8000: /* 7451/7441 */
8897 + case 0x8001: /* 7455/7445 */
8898 + case 0x8002: /* 7457/7447 */
8899 + case 0x8003: /* 7447A */
8900 + case 0x8004: /* 7448 */
8901 + return("perfmon_ppc32");
8904 + return "perfmon_power4";
8906 + return "perfmon_power5";
8908 + if (PVR_REV(pvr) < 0x300)
8909 + /* PMU behaves like POWER5 */
8910 + return "perfmon_power5";
8912 + /* PMU behaves like POWER6 */
8913 + return "perfmon_power6";
8915 + return "perfmon_power6";
8919 + return "perfmon_ppc970";
8921 + return "perfmon_cell";
8926 +void pfm_arch_init_percpu(void)
8928 +#ifdef CONFIG_PPC64
8929 + extern void ppc64_enable_pmcs(void);
8930 + ppc64_enable_pmcs();
8935 + * powerpc_irq_handler
8937 + * Get the perfmon context that belongs to the current CPU, and call the
8938 + * model-specific interrupt handler.
8940 +void powerpc_irq_handler(struct pt_regs *regs)
8942 + struct pfm_arch_pmu_info *arch_info;
8943 + struct pfm_context *ctx;
8945 + if (! regs->softe) {
8947 + * We got a PMU interrupt while interrupts were soft
8948 + * disabled. Disable hardware interrupts by clearing
8949 + * MSR_EE and also clear PMAO because we will need to set
8950 + * that again later when interrupts are re-enabled and
8951 + * raw_local_irq_restore() sees that the pmu_except_pending
8954 + regs->msr &= ~MSR_EE;
8955 + get_paca()->pmu_except_pending = 1;
8956 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
8960 + arch_info = pfm_pmu_info();
8961 + if (arch_info->irq_handler) {
8962 + ctx = __get_cpu_var(pmu_ctx);
8964 + arch_info->irq_handler(regs, ctx);
8967 diff --git a/arch/powerpc/perfmon/perfmon_cell.c b/arch/powerpc/perfmon/perfmon_cell.c
8968 new file mode 100644
8969 index 0000000..e1ae12c
8971 +++ b/arch/powerpc/perfmon/perfmon_cell.c
8974 + * This file contains the Cell PMU register description tables
8975 + * and pmc checker used by perfmon.c.
8977 + * Copyright IBM Corporation 2007
8978 + * (C) Copyright 2007 TOSHIBA CORPORATION
8980 + * Based on other Perfmon2 PMU modules.
8981 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
8982 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
8984 + * This program is free software; you can redistribute it and/or
8985 + * modify it under the terms of version 2 of the GNU General Public
8986 + * License as published by the Free Software Foundation.
8988 + * This program is distributed in the hope that it will be useful,
8989 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8990 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8991 + * General Public License for more details.
8993 + * You should have received a copy of the GNU General Public License
8994 + * along with this program; if not, write to the Free Software
8995 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
8999 +#include <linux/module.h>
9000 +#include <linux/perfmon_kern.h>
9001 +#include <linux/io.h>
9002 +#include <asm/cell-pmu.h>
9003 +#include <asm/cell-regs.h>
9004 +#include <asm/machdep.h>
9005 +#include <asm/rtas.h>
9006 +#include <asm/ps3.h>
9007 +#include <asm/spu.h>
9009 +MODULE_AUTHOR("Kevin Corry <kevcorry@us.ibm.com>, "
9010 + "Carl Love <carll@us.ibm.com>");
9011 +MODULE_DESCRIPTION("Cell PMU description table");
9012 +MODULE_LICENSE("GPL");
9014 +struct pfm_cell_platform_pmu_info {
9015 + u32 (*read_ctr)(u32 cpu, u32 ctr);
9016 + void (*write_ctr)(u32 cpu, u32 ctr, u32 val);
9017 + void (*write_pm07_control)(u32 cpu, u32 ctr, u32 val);
9018 + void (*write_pm)(u32 cpu, enum pm_reg_name reg, u32 val);
9019 + void (*enable_pm)(u32 cpu);
9020 + void (*disable_pm)(u32 cpu);
9021 + void (*enable_pm_interrupts)(u32 cpu, u32 thread, u32 mask);
9022 + u32 (*get_and_clear_pm_interrupts)(u32 cpu);
9023 + u32 (*get_hw_thread_id)(int cpu);
9024 + struct cbe_ppe_priv_regs __iomem *(*get_cpu_ppe_priv_regs)(int cpu);
9025 + struct cbe_pmd_regs __iomem *(*get_cpu_pmd_regs)(int cpu);
9026 + struct cbe_mic_tm_regs __iomem *(*get_cpu_mic_tm_regs)(int cpu);
9027 + int (*rtas_token)(const char *service);
9028 + int (*rtas_call)(int token, int param1, int param2, int *param3, ...);
9032 + * Mapping from Perfmon logical control registers to Cell hardware registers.
9034 +static struct pfm_regmap_desc pfm_cell_pmc_desc[] = {
9035 + /* Per-counter control registers. */
9036 + PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0),
9037 + PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0),
9038 + PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0),
9039 + PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0),
9040 + PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0),
9041 + PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0),
9042 + PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0),
9043 + PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0),
9045 + /* Per-counter RTAS arguments. Each of these registers has three fields.
9046 + * bits 63-48: debug-bus word
9047 + * bits 47-32: sub-unit
9048 + * bits 31-0 : full signal number
9049 + * (MSB = 63, LSB = 0)
9051 + PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0),
9052 + PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0),
9053 + PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0),
9054 + PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0),
9055 + PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0),
9056 + PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0),
9057 + PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0),
9058 + PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0),
9060 + /* Global control registers. Same order as enum pm_reg_name. */
9061 + PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0),
9062 + PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0),
9063 + PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0),
9064 + PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0),
9065 + PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0),
9066 + /* set the interrupt overflow bit for the four 32 bit counters
9067 + * that is currently supported. Will need to fix when 32 and 16
9068 + * bit counters are supported.
9070 + PMC_D(PFM_REG_I, "pm_control", 0xF0000000, 0xF0000000, 0, 0),
9071 + PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* FIX: Does user-space also need read access to this one? */
9072 + PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0),
9074 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc)
9076 +#define CELL_PMC_GROUP_CONTROL 16
9077 +#define CELL_PMC_PM_STATUS 20
9078 +#define CELL_PMC_PM_CONTROL 21
9079 +#define CELL_PMC_PM_CONTROL_CNTR_MASK 0x01E00000UL
9080 +#define CELL_PMC_PM_CONTROL_CNTR_16 0x01E00000UL
9083 + * Mapping from Perfmon logical data counters to Cell hardware counters.
9085 +static struct pfm_regmap_desc pfm_cell_pmd_desc[] = {
9086 + PMD_D(PFM_REG_C, "pm0", 0),
9087 + PMD_D(PFM_REG_C, "pm1", 0),
9088 + PMD_D(PFM_REG_C, "pm2", 0),
9089 + PMD_D(PFM_REG_C, "pm3", 0),
9090 + PMD_D(PFM_REG_C, "pm4", 0),
9091 + PMD_D(PFM_REG_C, "pm5", 0),
9092 + PMD_D(PFM_REG_C, "pm6", 0),
9093 + PMD_D(PFM_REG_C, "pm7", 0),
9095 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc)
9097 +#define PFM_EVENT_PMC_BUS_WORD(x) (((x) >> 48) & 0x00ff)
9098 +#define PFM_EVENT_PMC_FULL_SIGNAL_NUMBER(x) ((x) & 0xffffffff)
9099 +#define PFM_EVENT_PMC_SIGNAL_GROUP(x) (((x) & 0xffffffff) / 100)
9100 +#define PFM_PM_CTR_INPUT_MUX_BIT(pm07_control) (((pm07_control) >> 26) & 0x1f)
9101 +#define PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(pm07_control) ((pm07_control) >> 31)
9102 +#define PFM_GROUP_CONTROL_GROUP0_WORD(grp_ctrl) ((grp_ctrl) >> 30)
9103 +#define PFM_GROUP_CONTROL_GROUP1_WORD(grp_ctrl) (((grp_ctrl) >> 28) & 0x3)
9104 +#define PFM_NUM_OF_GROUPS 2
9105 +#define PFM_PPU_IU1_THREAD1_BASE_BIT 19
9106 +#define PFM_PPU_XU_THREAD1_BASE_BIT 16
9107 +#define PFM_COUNTER_CTRL_PMC_PPU_TH0 0x100000000ULL
9108 +#define PFM_COUNTER_CTRL_PMC_PPU_TH1 0x200000000ULL
9111 + * Debug-bus signal handling.
9113 + * Some Cell systems have firmware that can handle the debug-bus signal
9114 + * routing. For systems without this firmware, we have a minimal in-kernel
9115 + * implementation as well.
9118 +/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */
9119 +#ifdef CONFIG_SCHED_SMT
9120 +#define RTAS_CPU(cpu) ((cpu) / 2)
9122 +#define RTAS_CPU(cpu) (cpu)
9124 +#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff)
9125 +#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff)
9126 +#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff)
9127 +#define RTAS_SIGNAL_GROUP(x) (RTAS_SIGNAL_NUMBER(x) / 100)
9129 +#define subfunc_RESET 1
9130 +#define subfunc_ACTIVATE 2
9132 +#define passthru_ENABLE 1
9133 +#define passthru_DISABLE 2
9136 + * struct cell_rtas_arg
9138 + * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the
9139 + * firmware only sees the physical CPUs. So this value should be the
9140 + * SMT ID (from smp_processor_id() or get_cpu()) divided by 2.
9141 + * @sub_unit: Hardware subunit this applies to (if applicable).
9142 + * @signal_group: Signal group to enable/disable on the trace bus.
9143 + * @bus_word: For signal groups that propagate via the trace bus, this trace
9144 + * bus word will be used. This is a mask of (1 << TraceBusWord).
9145 + * For other signal groups, this specifies the trigger or event bus.
9146 + * @bit: Trigger/Event bit, if applicable for the signal group.
9148 + * An array of these structures are passed to rtas_call() to set up the
9149 + * signals on the debug bus.
9151 +struct cell_rtas_arg {
9160 + * rtas_reset_signals
9162 + * Use the firmware RTAS call to disable signal pass-thru and to reset the
9163 + * debug-bus signals.
9165 +static int rtas_reset_signals(u32 cpu)
9167 + struct cell_rtas_arg signal;
9168 + u64 real_addr = virt_to_phys(&signal);
9170 + struct pfm_cell_platform_pmu_info *info =
9171 + ((struct pfm_arch_pmu_info *)
9172 + (pfm_pmu_conf->pmu_info))->platform_info;
9174 + memset(&signal, 0, sizeof(signal));
9175 + signal.cpu = RTAS_CPU(cpu);
9176 + rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"),
9181 + real_addr & 0xffffffff,
9188 + * rtas_activate_signals
9190 + * Use the firmware RTAS call to enable signal pass-thru and to activate the
9191 + * desired signal groups on the debug-bus.
9193 +static int rtas_activate_signals(struct cell_rtas_arg *signals,
9196 + u64 real_addr = virt_to_phys(signals);
9198 + struct pfm_cell_platform_pmu_info *info =
9199 + ((struct pfm_arch_pmu_info *)
9200 + (pfm_pmu_conf->pmu_info))->platform_info;
9202 + rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"),
9207 + real_addr & 0xffffffff,
9208 + num_signals * sizeof(*signals));
9213 +#define HID1_RESET_MASK (~0x00000001ffffffffUL)
9214 +#define PPU_IU1_WORD0_HID1_EN_MASK (~0x00000001f0c0802cUL)
9215 +#define PPU_IU1_WORD0_HID1_EN_WORD ( 0x00000001f0400000UL)
9216 +#define PPU_IU1_WORD1_HID1_EN_MASK (~0x000000010fc08023UL)
9217 +#define PPU_IU1_WORD1_HID1_EN_WORD ( 0x000000010f400001UL)
9218 +#define PPU_XU_WORD0_HID1_EN_MASK (~0x00000001f038402cUL)
9219 +#define PPU_XU_WORD0_HID1_EN_WORD ( 0x00000001f0080008UL)
9220 +#define PPU_XU_WORD1_HID1_EN_MASK (~0x000000010f074023UL)
9221 +#define PPU_XU_WORD1_HID1_EN_WORD ( 0x000000010f030002UL)
9223 +/* The bus_word field in the cell_rtas_arg structure is a bit-mask
9224 + * indicating which debug-bus word(s) to use.
9233 +/* Definitions of the signal-groups that the built-in signal-activation
9234 + * code can handle.
9237 + SIG_GROUP_NONE = 0,
9239 + /* 2.x PowerPC Processor Unit (PPU) Signal Groups */
9240 + SIG_GROUP_PPU_BASE = 20,
9241 + SIG_GROUP_PPU_IU1 = 21,
9242 + SIG_GROUP_PPU_XU = 22,
9244 + /* 3.x PowerPC Storage Subsystem (PPSS) Signal Groups */
9245 + SIG_GROUP_PPSS_BASE = 30,
9247 + /* 4.x Synergistic Processor Unit (SPU) Signal Groups */
9248 + SIG_GROUP_SPU_BASE = 40,
9250 + /* 5.x Memory Flow Controller (MFC) Signal Groups */
9251 + SIG_GROUP_MFC_BASE = 50,
9253 + /* 6.x Element )nterconnect Bus (EIB) Signal Groups */
9254 + SIG_GROUP_EIB_BASE = 60,
9256 + /* 7.x Memory Interface Controller (MIC) Signal Groups */
9257 + SIG_GROUP_MIC_BASE = 70,
9259 + /* 8.x Cell Broadband Engine Interface (BEI) Signal Groups */
9260 + SIG_GROUP_BEI_BASE = 80,
9266 + * Read-modify-write for a special-purpose-register.
9268 +#define rmw_spr(spr_id, a_mask, o_mask) \
9270 + u64 value = mfspr(spr_id); \
9271 + value &= (u64)(a_mask); \
9272 + value |= (u64)(o_mask); \
9273 + mtspr((spr_id), value); \
9279 + * Read-modify-write for a 64-bit MMIO register.
9281 +#define rmw_mmio_reg64(mem, a_mask, o_mask) \
9283 + u64 value = in_be64(&(mem)); \
9284 + value &= (u64)(a_mask); \
9285 + value |= (u64)(o_mask); \
9286 + out_be64(&(mem), value); \
9292 + * Set or unset a specified bit within a 64-bit MMIO register.
9294 +#define rmwb_mmio_reg64(mem, bit_num, set_bit) \
9295 + rmw_mmio_reg64((mem), ~(1UL << (63 - (bit_num))), \
9296 + ((set_bit) << (63 - (bit_num))))
9301 + * Enable or disable passthru mode in all the Cell signal islands.
9303 +static int passthru(u32 cpu, u64 enable)
9305 + struct cbe_ppe_priv_regs __iomem *ppe_priv_regs;
9306 + struct cbe_pmd_regs __iomem *pmd_regs;
9307 + struct cbe_mic_tm_regs __iomem *mic_tm_regs;
9308 + struct pfm_cell_platform_pmu_info *info =
9309 + ((struct pfm_arch_pmu_info *)
9310 + (pfm_pmu_conf->pmu_info))->platform_info;
9312 + ppe_priv_regs = info->get_cpu_ppe_priv_regs(cpu);
9313 + pmd_regs = info->get_cpu_pmd_regs(cpu);
9314 + mic_tm_regs = info->get_cpu_mic_tm_regs(cpu);
9316 + if (!ppe_priv_regs || !pmd_regs || !mic_tm_regs) {
9317 + PFM_ERR("Error getting Cell PPE, PMD, and MIC "
9318 + "register maps: 0x%p, 0x%p, 0x%p",
9319 + ppe_priv_regs, pmd_regs, mic_tm_regs);
9323 + rmwb_mmio_reg64(ppe_priv_regs->L2_debug1, 61, enable);
9324 + rmwb_mmio_reg64(ppe_priv_regs->ciu_dr1, 5, enable);
9325 + rmwb_mmio_reg64(pmd_regs->on_ramp_trace, 39, enable);
9326 + rmwb_mmio_reg64(mic_tm_regs->MBL_debug, 20, enable);
9331 +#define passthru_enable(cpu) passthru(cpu, 1)
9332 +#define passthru_disable(cpu) passthru(cpu, 0)
9334 +static inline void reset_signal_registers(u32 cpu)
9336 + rmw_spr(SPRN_HID1, HID1_RESET_MASK, 0);
9340 + * celleb_reset_signals
9342 + * Non-rtas version of resetting the debug-bus signals.
9344 +static int celleb_reset_signals(u32 cpu)
9347 + rc = passthru_disable(cpu);
9349 + reset_signal_registers(cpu);
9356 + * Write the HID1 register to connect the specified PPU signal-group to the
9359 +static int ppu_selection(struct cell_rtas_arg *signal)
9361 + u64 hid1_enable_word = 0;
9362 + u64 hid1_enable_mask = 0;
9364 + switch (signal->signal_group) {
9366 + case SIG_GROUP_PPU_IU1: /* 2.1 PPU Instruction Unit - Group 1 */
9367 + switch (signal->bus_word) {
9369 + hid1_enable_mask = PPU_IU1_WORD0_HID1_EN_MASK;
9370 + hid1_enable_word = PPU_IU1_WORD0_HID1_EN_WORD;
9373 + hid1_enable_mask = PPU_IU1_WORD1_HID1_EN_MASK;
9374 + hid1_enable_word = PPU_IU1_WORD1_HID1_EN_WORD;
9377 + PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.",
9378 + signal->bus_word, signal->signal_group);
9383 + case SIG_GROUP_PPU_XU: /* 2.2 PPU Execution Unit */
9384 + switch (signal->bus_word) {
9386 + hid1_enable_mask = PPU_XU_WORD0_HID1_EN_MASK;
9387 + hid1_enable_word = PPU_XU_WORD0_HID1_EN_WORD;
9390 + hid1_enable_mask = PPU_XU_WORD1_HID1_EN_MASK;
9391 + hid1_enable_word = PPU_XU_WORD1_HID1_EN_WORD;
9394 + PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.",
9395 + signal->bus_word, signal->signal_group);
9401 + PFM_ERR("Signal-group %d not implemented.",
9402 + signal->signal_group);
9406 + rmw_spr(SPRN_HID1, hid1_enable_mask, hid1_enable_word);
9412 + * celleb_activate_signals
9414 + * Non-rtas version of activating the debug-bus signals.
9416 +static int celleb_activate_signals(struct cell_rtas_arg *signals,
9419 + int i, rc = -EINVAL;
9421 + for (i = 0; i < num_signals; i++) {
9422 + switch (signals[i].signal_group) {
9424 + /* 2.x PowerPC Processor Unit (PPU) Signal Selection */
9425 + case SIG_GROUP_PPU_IU1:
9426 + case SIG_GROUP_PPU_XU:
9427 + rc = ppu_selection(signals + i);
9433 + PFM_ERR("Signal-group %d not implemented.",
9434 + signals[i].signal_group);
9440 + rc = passthru_enable(signals[0].cpu);
9446 + * ps3_reset_signals
9448 + * ps3 version of resetting the debug-bus signals.
9450 +static int ps3_reset_signals(u32 cpu)
9452 +#ifdef CONFIG_PPC_PS3
9453 + return ps3_set_signal(0, 0, 0, 0);
9460 + * ps3_activate_signals
9462 + * ps3 version of activating the debug-bus signals.
9464 +static int ps3_activate_signals(struct cell_rtas_arg *signals,
9467 +#ifdef CONFIG_PPC_PS3
9470 + for (i = 0; i < num_signals; i++)
9471 + ps3_set_signal(signals[i].signal_group, signals[i].bit,
9472 + signals[i].sub_unit, signals[i].bus_word);
9481 + * Call to the firmware (if available) to reset the debug-bus signals.
9482 + * Otherwise call the built-in version.
9484 +int reset_signals(u32 cpu)
9488 + if (machine_is(celleb))
9489 + rc = celleb_reset_signals(cpu);
9490 + else if (machine_is(ps3))
9491 + rc = ps3_reset_signals(cpu);
9493 + rc = rtas_reset_signals(cpu);
9499 + * activate_signals
9501 + * Call to the firmware (if available) to activate the debug-bus signals.
9502 + * Otherwise call the built-in version.
9504 +int activate_signals(struct cell_rtas_arg *signals, int num_signals)
9508 + if (machine_is(celleb))
9509 + rc = celleb_activate_signals(signals, num_signals);
9510 + else if (machine_is(ps3))
9511 + rc = ps3_activate_signals(signals, num_signals);
9513 + rc = rtas_activate_signals(signals, num_signals);
9519 + * pfm_cell_pmc_check
9521 + * Verify that we are going to write a valid value to the specified PMC.
9523 +int pfm_cell_pmc_check(struct pfm_context *ctx,
9524 + struct pfm_event_set *set,
9525 + struct pfarg_pmc *req)
9527 + u16 cnum, reg_num = req->reg_num;
9528 + s16 signal_group = RTAS_SIGNAL_GROUP(req->reg_value);
9529 + u8 bus_word = RTAS_BUS_WORD(req->reg_value);
9531 + if (reg_num < NR_CTRS || reg_num >= (NR_CTRS * 2))
9534 + switch (signal_group) {
9535 + case SIG_GROUP_PPU_IU1:
9536 + case SIG_GROUP_PPU_XU:
9537 + if ((bus_word != 0) && (bus_word != 1)) {
9538 + PFM_ERR("Invalid bus word (%d) for signal-group %d",
9539 + bus_word, signal_group);
9544 + PFM_ERR("Signal-group %d not implemented.", signal_group);
9548 + for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) {
9549 + if (test_bit(cnum, cast_ulp(set->used_pmcs)) &&
9550 + bus_word == RTAS_BUS_WORD(set->pmcs[cnum]) &&
9551 + signal_group != RTAS_SIGNAL_GROUP(set->pmcs[cnum])) {
9552 + PFM_ERR("Impossible signal-group combination: "
9553 + "(%u,%u,%d) (%u,%u,%d)",
9554 + reg_num, bus_word, signal_group, cnum,
9555 + RTAS_BUS_WORD(set->pmcs[cnum]),
9556 + RTAS_SIGNAL_GROUP(set->pmcs[cnum]));
9565 + * write_pm07_event
9567 + * Pull out the RTAS arguments from the 64-bit register value and make the
9568 + * RTAS activate-signals call.
9570 +static void write_pm07_event(int cpu, unsigned int ctr, u64 value)
9572 + struct cell_rtas_arg signal;
9573 + s32 signal_number;
9576 + signal_number = RTAS_SIGNAL_NUMBER(value);
9577 + if (!signal_number) {
9578 + /* Don't include counters that are counting cycles. */
9582 + signal.cpu = RTAS_CPU(cpu);
9583 + signal.bus_word = 1 << RTAS_BUS_WORD(value);
9584 + signal.sub_unit = RTAS_SUB_UNIT(value);
9585 + signal.signal_group = signal_number / 100;
9586 + signal.bit = abs(signal_number) % 100;
9588 + rc = activate_signals(&signal, 1);
9590 + PFM_WARN("%s(%d, %u, %lu): Error calling "
9591 + "activate_signals(): %d\n", __func__,
9592 + cpu, ctr, (unsigned long)value, rc);
9593 + /* FIX: Could we change this routine to return an error? */
9598 + * pfm_cell_probe_pmu
9600 + * Simply check the processor version register to see if we're currently
9601 + * on a Cell system.
9603 +static int pfm_cell_probe_pmu(void)
9605 + unsigned long pvr = mfspr(SPRN_PVR);
9607 + if (PVR_VER(pvr) != PV_BE)
9614 + * pfm_cell_write_pmc
9616 +static void pfm_cell_write_pmc(unsigned int cnum, u64 value)
9618 + int cpu = smp_processor_id();
9619 + struct pfm_cell_platform_pmu_info *info =
9620 + ((struct pfm_arch_pmu_info *)
9621 + (pfm_pmu_conf->pmu_info))->platform_info;
9623 + if (cnum < NR_CTRS) {
9624 + info->write_pm07_control(cpu, cnum, value);
9626 + } else if (cnum < NR_CTRS * 2) {
9627 + write_pm07_event(cpu, cnum - NR_CTRS, value);
9629 + } else if (cnum == CELL_PMC_PM_STATUS) {
9630 + /* The pm_status register must be treated separately from
9631 + * the other "global" PMCs. This call will ensure that
9632 + * the interrupts are routed to the correct CPU, as well
9633 + * as writing the desired value to the pm_status register.
9635 + info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu),
9638 + } else if (cnum < PFM_PM_NUM_PMCS) {
9639 + info->write_pm(cpu, cnum - (NR_CTRS * 2), value);
9644 + * pfm_cell_write_pmd
9646 +static void pfm_cell_write_pmd(unsigned int cnum, u64 value)
9648 + int cpu = smp_processor_id();
9649 + struct pfm_cell_platform_pmu_info *info =
9650 + ((struct pfm_arch_pmu_info *)
9651 + (pfm_pmu_conf->pmu_info))->platform_info;
9653 + if (cnum < NR_CTRS)
9654 + info->write_ctr(cpu, cnum, value);
9658 + * pfm_cell_read_pmd
9660 +static u64 pfm_cell_read_pmd(unsigned int cnum)
9662 + int cpu = smp_processor_id();
9663 + struct pfm_cell_platform_pmu_info *info =
9664 + ((struct pfm_arch_pmu_info *)
9665 + (pfm_pmu_conf->pmu_info))->platform_info;
9667 + if (cnum < NR_CTRS)
9668 + return info->read_ctr(cpu, cnum);
9674 + * pfm_cell_enable_counters
9676 + * Just need to turn on the global disable bit in pm_control.
9678 +static void pfm_cell_enable_counters(struct pfm_context *ctx,
9679 + struct pfm_event_set *set)
9681 + struct pfm_cell_platform_pmu_info *info =
9682 + ((struct pfm_arch_pmu_info *)
9683 + (pfm_pmu_conf->pmu_info))->platform_info;
9685 + info->enable_pm(smp_processor_id());
9689 + * pfm_cell_disable_counters
9691 + * Just need to turn off the global disable bit in pm_control.
9693 +static void pfm_cell_disable_counters(struct pfm_context *ctx,
9694 + struct pfm_event_set *set)
9696 + struct pfm_cell_platform_pmu_info *info =
9697 + ((struct pfm_arch_pmu_info *)
9698 + (pfm_pmu_conf->pmu_info))->platform_info;
9700 + info->disable_pm(smp_processor_id());
9701 + if (machine_is(ps3))
9702 + reset_signals(smp_processor_id());
9706 + * Return the thread id of the specified ppu signal.
9708 +static inline u32 get_target_ppu_thread_id(u32 group, u32 bit)
9710 + if ((group == SIG_GROUP_PPU_IU1 &&
9711 + bit < PFM_PPU_IU1_THREAD1_BASE_BIT) ||
9712 + (group == SIG_GROUP_PPU_XU &&
9713 + bit < PFM_PPU_XU_THREAD1_BASE_BIT))
9720 + * Return whether the specified counter is for PPU signal group.
9722 +static inline int is_counter_for_ppu_sig_grp(u32 counter_control, u32 sig_grp)
9724 + if (!(counter_control & CBE_PM_CTR_INPUT_CONTROL) &&
9725 + (counter_control & CBE_PM_CTR_ENABLE) &&
9726 + ((sig_grp == SIG_GROUP_PPU_IU1) || (sig_grp == SIG_GROUP_PPU_XU)))
9733 + * Search ppu signal groups.
9735 +static int get_ppu_signal_groups(struct pfm_event_set *set,
9736 + u32 *ppu_sig_grp0, u32 *ppu_sig_grp1)
9738 + u64 pm_event, *used_pmcs = set->used_pmcs;
9740 + u32 grp0_wd, grp1_wd, wd, sig_grp;
9742 + *ppu_sig_grp0 = 0;
9743 + *ppu_sig_grp1 = 0;
9744 + grp0_wd = PFM_GROUP_CONTROL_GROUP0_WORD(
9745 + set->pmcs[CELL_PMC_GROUP_CONTROL]);
9746 + grp1_wd = PFM_GROUP_CONTROL_GROUP1_WORD(
9747 + set->pmcs[CELL_PMC_GROUP_CONTROL]);
9749 + for (i = 0, j = 0; (i < NR_CTRS) && (j < PFM_NUM_OF_GROUPS); i++) {
9750 + if (test_bit(i + NR_CTRS, used_pmcs)) {
9751 + pm_event = set->pmcs[i + NR_CTRS];
9752 + wd = PFM_EVENT_PMC_BUS_WORD(pm_event);
9753 + sig_grp = PFM_EVENT_PMC_SIGNAL_GROUP(pm_event);
9754 + if ((sig_grp == SIG_GROUP_PPU_IU1) ||
9755 + (sig_grp == SIG_GROUP_PPU_XU)) {
9757 + if (wd == grp0_wd && *ppu_sig_grp0 == 0) {
9758 + *ppu_sig_grp0 = sig_grp;
9760 + } else if (wd == grp1_wd &&
9761 + *ppu_sig_grp1 == 0) {
9762 + *ppu_sig_grp1 = sig_grp;
9772 + * pfm_cell_restore_pmcs
9774 + * Write all control register values that are saved in the specified event
9775 + * set. We could use the pfm_arch_write_pmc() function to restore each PMC
9776 + * individually (as is done in other architectures), but that results in
9777 + * multiple RTAS calls. As an optimization, we will setup the RTAS argument
9778 + * array so we can do all event-control registers in one RTAS call.
9780 + * In per-thread mode,
9781 + * The counter enable bit of the pmX_control PMC is enabled while the target
9782 + * task runs on the target HW thread.
9784 +void pfm_cell_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
9787 + u64 *used_pmcs = set->used_pmcs;
9789 + int cpu = smp_processor_id();
9790 + u32 current_th_id;
9791 + struct pfm_cell_platform_pmu_info *info =
9792 + ((struct pfm_arch_pmu_info *)
9793 + (pfm_pmu_conf->pmu_info))->platform_info;
9795 + for (i = 0; i < NR_CTRS; i++) {
9796 + ctr_ctrl = set->pmcs[i];
9798 + if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH0) {
9799 + current_th_id = info->get_hw_thread_id(cpu);
9802 + * Set the counter enable bit down if the current
9803 + * HW thread is NOT 0
9805 + if (current_th_id)
9806 + ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE;
9808 + } else if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH1) {
9809 + current_th_id = info->get_hw_thread_id(cpu);
9812 + * Set the counter enable bit down if the current
9815 + if (!current_th_id)
9816 + ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE;
9819 + /* Write the per-counter control register. If the PMC is not
9820 + * in use, then it will simply clear the register, which will
9821 + * disable the associated counter.
9823 + info->write_pm07_control(cpu, i, ctr_ctrl);
9825 + if (test_bit(i + NR_CTRS, used_pmcs))
9826 + write_pm07_event(cpu, 0, set->pmcs[i + NR_CTRS]);
9829 + /* Write all the global PMCs. Need to call pfm_cell_write_pmc()
9830 + * instead of cbe_write_pm() due to special handling for the
9831 + * pm_status register.
9833 + for (i *= 2; i < PFM_PM_NUM_PMCS; i++)
9834 + pfm_cell_write_pmc(i, set->pmcs[i]);
9838 + * pfm_cell_restore_pmds
9840 + * Write to pm_control register before writing to counter registers
9841 + * so that we can decide the counter width berfore writing to the couters.
9843 +void pfm_cell_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
9846 + unsigned int i, max_pmd;
9847 + int cpu = smp_processor_id();
9848 + struct pfm_cell_platform_pmu_info *info =
9849 + ((struct pfm_arch_pmu_info *)
9850 + (pfm_pmu_conf->pmu_info))->platform_info;
9853 + * Write pm_control register value
9855 + info->write_pm(cpu, pm_control,
9856 + set->pmcs[CELL_PMC_PM_CONTROL] &
9857 + ~CBE_PM_ENABLE_PERF_MON);
9858 + PFM_DBG("restore pm_control(0x%lx) before restoring pmds",
9859 + set->pmcs[CELL_PMC_PM_CONTROL]);
9861 + max_pmd = ctx->regs.max_pmd;
9862 + used_pmds = set->used_pmds;
9864 + for (i = 0; i < max_pmd; i++)
9865 + if (test_bit(i, used_pmds) &&
9866 + !(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_RO))
9867 + pfm_cell_write_pmd(i, set->pmds[i].value);
9871 + * pfm_cell_get_cntr_width
9873 + * This function check the 16bit counter field in pm_control pmc.
9876 + * 16 : all counters are 16bit width.
9877 + * 32 : all counters are 32bit width.
9878 + * 0 : several counter width exists.
9880 +static int pfm_cell_get_cntr_width(struct pfm_context *ctx,
9881 + struct pfm_event_set *s)
9887 + if (ctx->flags.switch_ovfl || ctx->flags.switch_time) {
9888 + list_for_each_entry(s, &ctx->set_list, list) {
9889 + cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] &
9890 + CELL_PMC_PM_CONTROL_CNTR_MASK;
9892 + if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16)
9894 + else if (cntr_field == 0x0)
9899 + if (tmp != width && width != 0)
9905 + cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] &
9906 + CELL_PMC_PM_CONTROL_CNTR_MASK;
9908 + if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16)
9910 + else if (cntr_field == 0x0)
9919 + * pfm_cell_check_cntr_ovfl_mask
9922 + * 1 : cntr_ovfl interrupt is used.
9923 + * 0 : cntr_ovfl interrupt is not used.
9925 +static int pfm_cell_check_cntr_ovfl(struct pfm_context *ctx,
9926 + struct pfm_event_set *s)
9928 + if (ctx->flags.switch_ovfl || ctx->flags.switch_time) {
9929 + list_for_each_entry(s, &ctx->set_list, list) {
9930 + if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS]))
9934 + if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS]))
9940 +#ifdef CONFIG_PPC_PS3
9942 + * update_sub_unit_field
9945 +static inline u64 update_sub_unit_field(u64 pm_event, u64 spe_id)
9947 + return ((pm_event & 0xFFFF0000FFFFFFFF) | (spe_id << 32));
9954 +static u64 pfm_get_spe_id(void *arg)
9956 + struct spu *spu = arg;
9959 + if (machine_is(ps3))
9960 + spe_id = ps3_get_spe_id(arg);
9962 + spe_id = spu->spe_id;
9968 + * pfm_spu_number_to_id
9971 +static int pfm_spu_number_to_id(int number, u64 *spe_id)
9976 + for (i = 0; i < MAX_NUMNODES; i++) {
9977 + if (cbe_spu_info[i].n_spus == 0)
9980 + list_for_each_entry(spu, &cbe_spu_info[i].spus, cbe_list)
9981 + if (spu->number == number) {
9982 + *spe_id = pfm_get_spe_id(spu);
9990 + * pfm_update_pmX_event_subunit_field
9992 + * In system wide mode,
9993 + * This function updates the subunit field of SPE pmX_event.
9995 +static int pfm_update_pmX_event_subunit_field(struct pfm_context *ctx)
9997 + struct pfm_event_set *set;
9998 + int i, last_pmc, ret;
9999 + u64 signal_group, spe_id;
10003 + last_pmc = NR_CTRS + 8;
10005 + list_for_each_entry(set, &ctx->set_list, list) {
10007 + used_pmcs = set->used_pmcs;
10008 + for (i = NR_CTRS; i < last_pmc; i++) {
10009 + if (!test_bit(i, used_pmcs))
10012 + signal_group = PFM_EVENT_PMC_SIGNAL_GROUP(set->pmcs[i]);
10015 + * If the target event is a SPE signal group event,
10016 + * The sub_unit field in pmX_event pmc is changed to the
10017 + * specified spe_id.
10019 + if (SIG_GROUP_SPU_BASE < signal_group &&
10020 + signal_group < SIG_GROUP_EIB_BASE) {
10021 + sub_unit = RTAS_SUB_UNIT(set->pmcs[i]);
10023 + ret = pfm_spu_number_to_id(sub_unit, &spe_id);
10027 + set->pmcs[i] = update_sub_unit_field(
10028 + set->pmcs[i], spe_id);
10037 + * pfm_cell_load_context
10039 + * In per-thread mode,
10040 + * The pmX_control PMCs which are used for PPU IU/XU event are marked with
10041 + * the thread id(PFM_COUNTER_CTRL_PMC_PPU_TH0/TH1).
10043 +static int pfm_cell_load_context(struct pfm_context *ctx)
10046 + u32 ppu_sig_grp[PFM_NUM_OF_GROUPS] = {SIG_GROUP_NONE, SIG_GROUP_NONE};
10049 + u32 target_th_id;
10050 + int ppu_sig_num = 0;
10051 + struct pfm_event_set *s;
10052 + int cntr_width = 32;
10055 + if (pfm_cell_check_cntr_ovfl(ctx, ctx->active_set)) {
10056 + cntr_width = pfm_cell_get_cntr_width(ctx, ctx->active_set);
10059 + * Counter overflow interrupt works with only 32bit counter,
10060 + * because perfmon core uses pfm_cell_pmu_conf.counter_width
10061 + * to deal with the counter overflow. we can't change the
10062 + * counter width here.
10064 + if (cntr_width != 32)
10068 + if (ctx->flags.system) {
10069 +#ifdef CONFIG_PPC_PS3
10070 + if (machine_is(ps3))
10071 + ret = pfm_update_pmX_event_subunit_field(ctx);
10076 + list_for_each_entry(s, &ctx->set_list, list) {
10077 + ppu_sig_num = get_ppu_signal_groups(s, &ppu_sig_grp[0],
10078 + &ppu_sig_grp[1]);
10080 + for (i = 0; i < NR_CTRS; i++) {
10081 + index = PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(s->pmcs[i]);
10082 + if (ppu_sig_num &&
10083 + (ppu_sig_grp[index] != SIG_GROUP_NONE) &&
10084 + is_counter_for_ppu_sig_grp(s->pmcs[i],
10085 + ppu_sig_grp[index])) {
10087 + bit = PFM_PM_CTR_INPUT_MUX_BIT(s->pmcs[i]);
10088 + target_th_id = get_target_ppu_thread_id(
10089 + ppu_sig_grp[index], bit);
10090 + if (!target_th_id)
10092 + PFM_COUNTER_CTRL_PMC_PPU_TH0;
10095 + PFM_COUNTER_CTRL_PMC_PPU_TH1;
10096 + PFM_DBG("set:%d mark ctr:%d target_thread:%d",
10097 + s->id, i, target_th_id);
10106 + * pfm_cell_unload_context
10108 + * For system-wide contexts and self-monitored contexts, make the RTAS call
10109 + * to reset the debug-bus signals.
10111 + * For non-self-monitored contexts, the monitored thread will already have
10112 + * been taken off the CPU and we don't need to do anything additional.
10114 +static void pfm_cell_unload_context(struct pfm_context *ctx)
10116 + if (ctx->task == current || ctx->flags.system)
10117 + reset_signals(smp_processor_id());
10121 + * pfm_cell_ctxswout_thread
10123 + * When a monitored thread is switched out (self-monitored or externally
10124 + * monitored) we need to reset the debug-bus signals so the next context that
10125 + * gets switched in can start from a clean set of signals.
10127 +int pfm_cell_ctxswout_thread(struct task_struct *task,
10128 + struct pfm_context *ctx, struct pfm_event_set *set)
10130 + reset_signals(smp_processor_id());
10135 + * pfm_cell_get_ovfl_pmds
10137 + * Determine which counters in this set have overflowed and fill in the
10138 + * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status
10139 + * register contains a bit for each counter to indicate overflow. However,
10140 + * those 8 bits are in the reverse order than what Perfmon2 is expecting,
10141 + * so we need to reverse the order of the overflow bits.
10143 +static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx,
10144 + struct pfm_event_set *set)
10146 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
10147 + u32 pm_status, ovfl_ctrs;
10148 + u64 povfl_pmds = 0;
10150 + struct pfm_cell_platform_pmu_info *info =
10151 + ((struct pfm_arch_pmu_info *)
10152 + (pfm_pmu_conf->pmu_info))->platform_info;
10154 + if (!ctx_arch->last_read_updated)
10155 + /* This routine was not called via the interrupt handler.
10156 + * Need to start by getting interrupts and updating
10157 + * last_read_pm_status.
10159 + ctx_arch->last_read_pm_status =
10160 + info->get_and_clear_pm_interrupts(smp_processor_id());
10162 + /* Reset the flag that the interrupt handler last read pm_status. */
10163 + ctx_arch->last_read_updated = 0;
10165 + pm_status = ctx_arch->last_read_pm_status &
10166 + set->pmcs[CELL_PMC_PM_STATUS];
10167 + ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status);
10169 + /* Reverse the order of the bits in ovfl_ctrs
10170 + * and store the result in povfl_pmds.
10172 + for (i = 0; i < PFM_PM_NUM_PMDS; i++) {
10173 + povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1);
10177 + /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds.
10178 + * Count the bits set in set->povfl_pmds to get set->npend_ovfls.
10180 + bitmap_and(set->povfl_pmds, &povfl_pmds,
10181 + set->used_pmds, PFM_PM_NUM_PMDS);
10182 + set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS);
10186 + * pfm_cell_acquire_pmu
10188 + * acquire PMU resource.
10189 + * This acquisition is done when the first context is created.
10191 +int pfm_cell_acquire_pmu(u64 *unavail_pmcs, u64 *unavail_pmds)
10193 +#ifdef CONFIG_PPC_PS3
10196 + if (machine_is(ps3)) {
10198 + ret = ps3_lpm_open(PS3_LPM_TB_TYPE_INTERNAL, NULL, 0);
10200 + PFM_ERR("Can't create PS3 lpm. error:%d", ret);
10209 + * pfm_cell_release_pmu
10211 + * release PMU resource.
10212 + * actual release happens when last context is destroyed
10214 +void pfm_cell_release_pmu(void)
10216 +#ifdef CONFIG_PPC_PS3
10217 + if (machine_is(ps3)) {
10218 + if (ps3_lpm_close())
10219 + PFM_ERR("Can't delete PS3 lpm.");
10225 + * handle_trace_buffer_interrupts
10227 + * This routine is for processing just the interval timer and trace buffer
10228 + * overflow interrupts. Performance counter interrupts are handled by the
10229 + * perf_irq_handler() routine, which reads and saves the pm_status register.
10230 + * This routine should not read the actual pm_status register, but rather
10231 + * the value passed in.
10233 +static void handle_trace_buffer_interrupts(unsigned long iip,
10234 + struct pt_regs *regs,
10235 + struct pfm_context *ctx,
10238 + /* FIX: Currently ignoring trace-buffer interrupts. */
10243 + * pfm_cell_irq_handler
10245 + * Handler for all Cell performance-monitor interrupts.
10247 +static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx)
10249 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
10250 + u32 last_read_pm_status;
10251 + int cpu = smp_processor_id();
10252 + struct pfm_cell_platform_pmu_info *info =
10253 + ((struct pfm_arch_pmu_info *)
10254 + (pfm_pmu_conf->pmu_info))->platform_info;
10256 + /* Need to disable and reenable the performance counters to get the
10257 + * desired behavior from the hardware. This is specific to the Cell
10260 + info->disable_pm(cpu);
10262 + /* Read the pm_status register to get the interrupt bits. If a
10263 + * perfmormance counter overflow interrupt occurred, call the core
10264 + * perfmon interrupt handler to service the counter overflow. If the
10265 + * interrupt was for the interval timer or the trace_buffer,
10266 + * call the interval timer and trace buffer interrupt handler.
10268 + * The value read from the pm_status register is stored in the
10269 + * pmf_arch_context structure for use by other routines. Note that
10270 + * reading the pm_status register resets the interrupt flags to zero.
10271 + * Hence, it is important that the register is only read in one place.
10273 + * The pm_status reg interrupt reg format is:
10274 + * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:]
10275 + * - pmd0 to pm7 are the perf counter overflow interrupts.
10276 + * - intt is the interval timer overflowed interrupt.
10277 + * - tbf is the trace buffer full interrupt.
10278 + * - tbu is the trace buffer underflow interrupt.
10279 + * - The pmd0 bit is the MSB of the 32 bit register.
10281 + ctx_arch->last_read_pm_status = last_read_pm_status =
10282 + info->get_and_clear_pm_interrupts(cpu);
10284 + /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows
10285 + * last_read_pm_status was updated by the interrupt handler.
10287 + ctx_arch->last_read_updated = 1;
10289 + if (last_read_pm_status & CBE_PM_ALL_OVERFLOW_INTR)
10290 + /* At least one counter overflowed. */
10291 + pfm_interrupt_handler(instruction_pointer(regs), regs);
10293 + if (last_read_pm_status & (CBE_PM_INTERVAL_INTR |
10294 + CBE_PM_TRACE_BUFFER_FULL_INTR |
10295 + CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR))
10296 + /* Trace buffer or interval timer overflow. */
10297 + handle_trace_buffer_interrupts(instruction_pointer(regs),
10298 + regs, ctx, last_read_pm_status);
10300 + /* The interrupt settings is the value written to the pm_status
10301 + * register. It is saved in the context when the register is
10304 + info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu),
10305 + ctx->active_set->pmcs[CELL_PMC_PM_STATUS]);
10307 + /* The writes to the various performance counters only writes to a
10308 + * latch. The new values (interrupt setting bits, reset counter value
10309 + * etc.) are not copied to the actual registers until the performance
10310 + * monitor is enabled. In order to get this to work as desired, the
10311 + * permormance monitor needs to be disabled while writting to the
10312 + * latches. This is a HW design issue.
10314 + info->enable_pm(cpu);
10318 +static struct pfm_cell_platform_pmu_info ps3_platform_pmu_info = {
10319 +#ifdef CONFIG_PPC_PS3
10320 + .read_ctr = ps3_read_ctr,
10321 + .write_ctr = ps3_write_ctr,
10322 + .write_pm07_control = ps3_write_pm07_control,
10323 + .write_pm = ps3_write_pm,
10324 + .enable_pm = ps3_enable_pm,
10325 + .disable_pm = ps3_disable_pm,
10326 + .enable_pm_interrupts = ps3_enable_pm_interrupts,
10327 + .get_and_clear_pm_interrupts = ps3_get_and_clear_pm_interrupts,
10328 + .get_hw_thread_id = ps3_get_hw_thread_id,
10329 + .get_cpu_ppe_priv_regs = NULL,
10330 + .get_cpu_pmd_regs = NULL,
10331 + .get_cpu_mic_tm_regs = NULL,
10332 + .rtas_token = NULL,
10333 + .rtas_call = NULL,
10337 +static struct pfm_cell_platform_pmu_info native_platform_pmu_info = {
10338 +#ifdef CONFIG_PPC_CELL_NATIVE
10339 + .read_ctr = cbe_read_ctr,
10340 + .write_ctr = cbe_write_ctr,
10341 + .write_pm07_control = cbe_write_pm07_control,
10342 + .write_pm = cbe_write_pm,
10343 + .enable_pm = cbe_enable_pm,
10344 + .disable_pm = cbe_disable_pm,
10345 + .enable_pm_interrupts = cbe_enable_pm_interrupts,
10346 + .get_and_clear_pm_interrupts = cbe_get_and_clear_pm_interrupts,
10347 + .get_hw_thread_id = cbe_get_hw_thread_id,
10348 + .get_cpu_ppe_priv_regs = cbe_get_cpu_ppe_priv_regs,
10349 + .get_cpu_pmd_regs = cbe_get_cpu_pmd_regs,
10350 + .get_cpu_mic_tm_regs = cbe_get_cpu_mic_tm_regs,
10351 + .rtas_token = rtas_token,
10352 + .rtas_call = rtas_call,
10356 +static struct pfm_arch_pmu_info pfm_cell_pmu_info = {
10357 + .pmu_style = PFM_POWERPC_PMU_CELL,
10358 + .acquire_pmu = pfm_cell_acquire_pmu,
10359 + .release_pmu = pfm_cell_release_pmu,
10360 + .write_pmc = pfm_cell_write_pmc,
10361 + .write_pmd = pfm_cell_write_pmd,
10362 + .read_pmd = pfm_cell_read_pmd,
10363 + .enable_counters = pfm_cell_enable_counters,
10364 + .disable_counters = pfm_cell_disable_counters,
10365 + .irq_handler = pfm_cell_irq_handler,
10366 + .get_ovfl_pmds = pfm_cell_get_ovfl_pmds,
10367 + .restore_pmcs = pfm_cell_restore_pmcs,
10368 + .restore_pmds = pfm_cell_restore_pmds,
10369 + .ctxswout_thread = pfm_cell_ctxswout_thread,
10370 + .load_context = pfm_cell_load_context,
10371 + .unload_context = pfm_cell_unload_context,
10374 +static struct pfm_pmu_config pfm_cell_pmu_conf = {
10375 + .pmu_name = "Cell",
10376 + .version = "0.1",
10377 + .counter_width = 32,
10378 + .pmd_desc = pfm_cell_pmd_desc,
10379 + .pmc_desc = pfm_cell_pmc_desc,
10380 + .num_pmc_entries = PFM_PM_NUM_PMCS,
10381 + .num_pmd_entries = PFM_PM_NUM_PMDS,
10382 + .probe_pmu = pfm_cell_probe_pmu,
10383 + .pmu_info = &pfm_cell_pmu_info,
10384 + .flags = PFM_PMU_BUILTIN_FLAG,
10385 + .owner = THIS_MODULE,
10389 + * pfm_cell_platform_probe
10391 + * If we're on a system without the firmware rtas call available, set up the
10392 + * PMC write-checker for all the pmX_event control registers.
10394 +static void pfm_cell_platform_probe(void)
10396 + if (machine_is(celleb)) {
10398 + pfm_cell_pmu_conf.pmc_write_check = pfm_cell_pmc_check;
10399 + for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++)
10400 + pfm_cell_pmc_desc[cnum].type |= PFM_REG_WC;
10403 + if (machine_is(ps3))
10404 + pfm_cell_pmu_info.platform_info = &ps3_platform_pmu_info;
10406 + pfm_cell_pmu_info.platform_info = &native_platform_pmu_info;
10409 +static int __init pfm_cell_pmu_init_module(void)
10411 + pfm_cell_platform_probe();
10412 + return pfm_pmu_register(&pfm_cell_pmu_conf);
10415 +static void __exit pfm_cell_pmu_cleanup_module(void)
10417 + pfm_pmu_unregister(&pfm_cell_pmu_conf);
10420 +module_init(pfm_cell_pmu_init_module);
10421 +module_exit(pfm_cell_pmu_cleanup_module);
10422 diff --git a/arch/powerpc/perfmon/perfmon_power4.c b/arch/powerpc/perfmon/perfmon_power4.c
10423 new file mode 100644
10424 index 0000000..eba9e8c
10426 +++ b/arch/powerpc/perfmon/perfmon_power4.c
10429 + * This file contains the POWER4 PMU register description tables
10430 + * and pmc checker used by perfmon.c.
10432 + * Copyright (c) 2007, IBM Corporation.
10434 + * Based on a simple modification of perfmon_power5.c for POWER4 by
10435 + * Corey Ashford <cjashfor@us.ibm.com>.
10437 + * This program is free software; you can redistribute it and/or
10438 + * modify it under the terms of version 2 of the GNU General Public
10439 + * License as published by the Free Software Foundation.
10441 + * This program is distributed in the hope that it will be useful,
10442 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10443 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10444 + * General Public License for more details.
10446 + * You should have received a copy of the GNU General Public License
10447 + * along with this program; if not, write to the Free Software
10448 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
10451 +#include <linux/module.h>
10452 +#include <linux/perfmon_kern.h>
10454 +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
10455 +MODULE_DESCRIPTION("POWER4 PMU description table");
10456 +MODULE_LICENSE("GPL");
10458 +static struct pfm_regmap_desc pfm_power4_pmc_desc[] = {
10459 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
10460 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
10461 +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
10463 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power4_pmc_desc)
10465 +/* The TB and PURR registers are read-only. Also, note that the TB register
10466 + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
10467 + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
10469 +static struct pfm_regmap_desc pfm_power4_pmd_desc[] = {
10470 +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
10471 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
10472 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
10473 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
10474 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
10475 +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
10476 +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
10477 +/* pmd7 */ PMD_D(PFM_REG_C, "PMC7", SPRN_PMC7),
10478 +/* pmd8 */ PMD_D(PFM_REG_C, "PMC8", SPRN_PMC8)
10480 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power4_pmd_desc)
10482 +static int pfm_power4_probe_pmu(void)
10484 + unsigned long pvr = mfspr(SPRN_PVR);
10485 + int ver = PVR_VER(pvr);
10487 + if ((ver == PV_POWER4) || (ver == PV_POWER4p))
10493 +static void pfm_power4_write_pmc(unsigned int cnum, u64 value)
10495 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
10497 + mtspr(SPRN_MMCR0, value);
10500 + mtspr(SPRN_MMCR1, value);
10503 + mtspr(SPRN_MMCRA, value);
10510 +static void pfm_power4_write_pmd(unsigned int cnum, u64 value)
10512 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
10514 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10516 + mtspr(SPRN_PMC1, value & ovfl_mask);
10519 + mtspr(SPRN_PMC2, value & ovfl_mask);
10522 + mtspr(SPRN_PMC3, value & ovfl_mask);
10525 + mtspr(SPRN_PMC4, value & ovfl_mask);
10528 + mtspr(SPRN_PMC5, value & ovfl_mask);
10531 + mtspr(SPRN_PMC6, value & ovfl_mask);
10534 + mtspr(SPRN_PMC7, value & ovfl_mask);
10537 + mtspr(SPRN_PMC8, value & ovfl_mask);
10541 + /* Ignore writes to read-only registers. */
10548 +static u64 pfm_power4_read_pmd(unsigned int cnum)
10550 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10552 + return mfspr(SPRN_PMC1);
10554 + return mfspr(SPRN_PMC2);
10556 + return mfspr(SPRN_PMC3);
10558 + return mfspr(SPRN_PMC4);
10560 + return mfspr(SPRN_PMC5);
10562 + return mfspr(SPRN_PMC6);
10564 + return mfspr(SPRN_PMC7);
10566 + return mfspr(SPRN_PMC8);
10568 + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
10570 + if (cpu_has_feature(CPU_FTR_PURR))
10571 + return mfspr(SPRN_PURR);
10579 +/* forward decl */
10580 +static void pfm_power4_disable_counters(struct pfm_context *ctx,
10581 + struct pfm_event_set *set);
10584 + * pfm_power4_enable_counters
10587 +static void pfm_power4_enable_counters(struct pfm_context *ctx,
10588 + struct pfm_event_set *set)
10590 + unsigned int i, max_pmc;
10592 + /* Make sure the counters are disabled before touching the other
10593 + control registers */
10594 + pfm_power4_disable_counters(ctx, set);
10596 + max_pmc = ctx->regs.max_pmc;
10598 + /* Write MMCR0 last, and a fairly easy way to do this is to write
10599 + the registers in the reverse order */
10600 + for (i = max_pmc; i != 0; i--)
10601 + if (test_bit(i - 1, set->used_pmcs))
10602 + pfm_power4_write_pmc(i - 1, set->pmcs[i - 1]);
10606 + * pfm_power4_disable_counters
10609 +static void pfm_power4_disable_counters(struct pfm_context *ctx,
10610 + struct pfm_event_set *set)
10612 + /* Set the Freeze Counters bit */
10613 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
10614 + asm volatile ("sync");
10618 + * pfm_power4_get_ovfl_pmds
10620 + * Determine which counters in this set have overflowed and fill in the
10621 + * set->povfl_pmds mask and set->npend_ovfls count.
10623 +static void pfm_power4_get_ovfl_pmds(struct pfm_context *ctx,
10624 + struct pfm_event_set *set)
10627 + unsigned int max_pmd = ctx->regs.max_intr_pmd;
10628 + u64 *used_pmds = set->used_pmds;
10629 + u64 *cntr_pmds = ctx->regs.cnt_pmds;
10630 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
10631 + u64 new_val, mask[PFM_PMD_BV];
10633 + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds),
10634 + cast_ulp(used_pmds), max_pmd);
10636 + for (i = 0; i < max_pmd; i++) {
10637 + if (test_bit(i, mask)) {
10638 + new_val = pfm_power4_read_pmd(i);
10639 + if (new_val & width_mask) {
10640 + set_bit(i, set->povfl_pmds);
10641 + set->npend_ovfls++;
10647 +static void pfm_power4_irq_handler(struct pt_regs *regs,
10648 + struct pfm_context *ctx)
10652 + /* Disable the counters (set the freeze bit) to not polute
10655 + mmcr0 = mfspr(SPRN_MMCR0);
10656 + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
10658 + /* Set the PMM bit (see comment below). */
10659 + mtmsrd(mfmsr() | MSR_PMM);
10661 + pfm_interrupt_handler(instruction_pointer(regs), regs);
10663 + mmcr0 = mfspr(SPRN_MMCR0);
10666 + * Reset the perfmon trigger if
10667 + * not in masking mode.
10669 + if (ctx->state != PFM_CTX_MASKED)
10670 + mmcr0 |= MMCR0_PMXE;
10673 + * We must clear the PMAO bit on some (GQ) chips. Just do it
10676 + mmcr0 &= ~MMCR0_PMAO;
10679 + * Now clear the freeze bit, counting will not start until we
10680 + * rfid from this exception, because only at that point will
10681 + * the PMM bit be cleared.
10683 + mmcr0 &= ~MMCR0_FC;
10684 + mtspr(SPRN_MMCR0, mmcr0);
10687 +static void pfm_power4_resend_irq(struct pfm_context *ctx)
10690 + * Assert the PMAO bit to cause a PMU interrupt. Make sure we
10691 + * trigger the edge detection circuitry for PMAO
10693 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
10694 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
10697 +struct pfm_arch_pmu_info pfm_power4_pmu_info = {
10698 + .pmu_style = PFM_POWERPC_PMU_POWER4,
10699 + .write_pmc = pfm_power4_write_pmc,
10700 + .write_pmd = pfm_power4_write_pmd,
10701 + .read_pmd = pfm_power4_read_pmd,
10702 + .irq_handler = pfm_power4_irq_handler,
10703 + .get_ovfl_pmds = pfm_power4_get_ovfl_pmds,
10704 + .enable_counters = pfm_power4_enable_counters,
10705 + .disable_counters = pfm_power4_disable_counters,
10706 + .resend_irq = pfm_power4_resend_irq
10710 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
10712 +static struct pfm_pmu_config pfm_power4_pmu_conf = {
10713 + .pmu_name = "POWER4",
10714 + .counter_width = 31,
10715 + .pmd_desc = pfm_power4_pmd_desc,
10716 + .pmc_desc = pfm_power4_pmc_desc,
10717 + .num_pmc_entries = PFM_PM_NUM_PMCS,
10718 + .num_pmd_entries = PFM_PM_NUM_PMDS,
10719 + .probe_pmu = pfm_power4_probe_pmu,
10720 + .pmu_info = &pfm_power4_pmu_info,
10721 + .flags = PFM_PMU_BUILTIN_FLAG,
10722 + .owner = THIS_MODULE
10725 +static int __init pfm_power4_pmu_init_module(void)
10727 + return pfm_pmu_register(&pfm_power4_pmu_conf);
10730 +static void __exit pfm_power4_pmu_cleanup_module(void)
10732 + pfm_pmu_unregister(&pfm_power4_pmu_conf);
10735 +module_init(pfm_power4_pmu_init_module);
10736 +module_exit(pfm_power4_pmu_cleanup_module);
10737 diff --git a/arch/powerpc/perfmon/perfmon_power5.c b/arch/powerpc/perfmon/perfmon_power5.c
10738 new file mode 100644
10739 index 0000000..f4bb1ac
10741 +++ b/arch/powerpc/perfmon/perfmon_power5.c
10744 + * This file contains the POWER5 PMU register description tables
10745 + * and pmc checker used by perfmon.c.
10747 + * Copyright (c) 2005 David Gibson, IBM Corporation.
10749 + * Based on perfmon_p6.c:
10750 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
10751 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
10753 + * This program is free software; you can redistribute it and/or
10754 + * modify it under the terms of version 2 of the GNU General Public
10755 + * License as published by the Free Software Foundation.
10757 + * This program is distributed in the hope that it will be useful,
10758 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10759 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10760 + * General Public License for more details.
10762 + * You should have received a copy of the GNU General Public License
10763 + * along with this program; if not, write to the Free Software
10764 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
10767 +#include <linux/module.h>
10768 +#include <linux/perfmon_kern.h>
10770 +MODULE_AUTHOR("David Gibson <dwg@au1.ibm.com>");
10771 +MODULE_DESCRIPTION("POWER5 PMU description table");
10772 +MODULE_LICENSE("GPL");
10774 +static struct pfm_regmap_desc pfm_power5_pmc_desc[] = {
10775 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
10776 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
10777 +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
10779 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power5_pmc_desc)
10781 +/* The TB and PURR registers are read-only. Also, note that the TB register
10782 + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
10783 + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
10785 +static struct pfm_regmap_desc pfm_power5_pmd_desc[] = {
10786 +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
10787 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
10788 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
10789 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
10790 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
10791 +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
10792 +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
10793 +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
10795 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power5_pmd_desc)
10797 +/* forward decl */
10798 +static void pfm_power5_disable_counters(struct pfm_context *ctx,
10799 + struct pfm_event_set *set);
10801 +static int pfm_power5_probe_pmu(void)
10803 + unsigned long pvr = mfspr(SPRN_PVR);
10805 + switch (PVR_VER(pvr)) {
10809 + return (PVR_REV(pvr) < 0x300) ? 0 : -1;
10815 +static void pfm_power5_write_pmc(unsigned int cnum, u64 value)
10817 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
10819 + mtspr(SPRN_MMCR0, value);
10822 + mtspr(SPRN_MMCR1, value);
10825 + mtspr(SPRN_MMCRA, value);
10832 +static void pfm_power5_write_pmd(unsigned int cnum, u64 value)
10834 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
10836 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10838 + mtspr(SPRN_PMC1, value & ovfl_mask);
10841 + mtspr(SPRN_PMC2, value & ovfl_mask);
10844 + mtspr(SPRN_PMC3, value & ovfl_mask);
10847 + mtspr(SPRN_PMC4, value & ovfl_mask);
10850 + mtspr(SPRN_PMC5, value & ovfl_mask);
10853 + mtspr(SPRN_PMC6, value & ovfl_mask);
10857 + /* Ignore writes to read-only registers. */
10864 +static u64 pfm_power5_read_pmd(unsigned int cnum)
10866 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10868 + return mfspr(SPRN_PMC1);
10870 + return mfspr(SPRN_PMC2);
10872 + return mfspr(SPRN_PMC3);
10874 + return mfspr(SPRN_PMC4);
10876 + return mfspr(SPRN_PMC5);
10878 + return mfspr(SPRN_PMC6);
10880 + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
10882 + if (cpu_has_feature(CPU_FTR_PURR))
10883 + return mfspr(SPRN_PURR);
10892 + * pfm_power5_enable_counters
10895 +static void pfm_power5_enable_counters(struct pfm_context *ctx,
10896 + struct pfm_event_set *set)
10898 + unsigned int i, max_pmc;
10901 + * Make sure the counters are disabled before touching the
10902 + * other control registers
10904 + pfm_power5_disable_counters(ctx, set);
10906 + max_pmc = ctx->regs.max_pmc;
10909 + * Write MMCR0 last, and a fairly easy way to do
10910 + * this is to write the registers in the reverse
10913 + for (i = max_pmc; i != 0; i--)
10914 + if (test_bit(i - 1, set->used_pmcs))
10915 + pfm_power5_write_pmc(i - 1, set->pmcs[i - 1]);
10919 + * pfm_power5_disable_counters
10921 + * Just need to zero all the control registers.
10923 +static void pfm_power5_disable_counters(struct pfm_context *ctx,
10924 + struct pfm_event_set *set)
10926 + /* Set the Freeze Counters bit */
10927 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
10928 + asm volatile ("sync");
10932 + * pfm_power5_get_ovfl_pmds
10934 + * Determine which counters in this set have overflowed and fill in the
10935 + * set->povfl_pmds mask and set->npend_ovfls count.
10937 +static void pfm_power5_get_ovfl_pmds(struct pfm_context *ctx,
10938 + struct pfm_event_set *set)
10941 + unsigned int max = ctx->regs.max_intr_pmd;
10942 + u64 *used_pmds = set->used_pmds;
10943 + u64 *intr_pmds = ctx->regs.intr_pmds;
10944 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
10945 + u64 new_val, mask[PFM_PMD_BV];
10947 + bitmap_and(cast_ulp(mask), cast_ulp(intr_pmds),
10948 + cast_ulp(used_pmds), max);
10950 + * If either PMC5 or PMC6 are not being used, just zero out the unused
10951 + * ones so that they won't interrupt again for another 2^31 counts.
10952 + * Note that if no other counters overflowed, set->npend_ovfls will
10953 + * be zero upon returning from this call (i.e. a spurious
10954 + * interrupt), but that should be ok.
10956 + * If neither PMC5 nor PMC6 are used, the counters should be frozen
10957 + * via MMCR0_FC5_6 and zeroed out.
10959 + * If both PMC5 and PMC6 are used, they can be handled correctly by
10960 + * the loop that follows.
10963 + if (!test_bit(5, cast_ulp(used_pmds)))
10964 + mtspr(SPRN_PMC5, 0);
10965 + if (!test_bit(6, cast_ulp(used_pmds)))
10966 + mtspr(SPRN_PMC6, 0);
10968 + for (i = 0; i < max; i++) {
10969 + if (test_bit(i, mask)) {
10970 + new_val = pfm_power5_read_pmd(i);
10971 + if (new_val & width_mask) {
10972 + set_bit(i, set->povfl_pmds);
10973 + set->npend_ovfls++;
10979 +static void pfm_power5_irq_handler(struct pt_regs *regs,
10980 + struct pfm_context *ctx)
10984 + /* Disable the counters (set the freeze bit) to not polute
10987 + mmcr0 = mfspr(SPRN_MMCR0);
10988 + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
10990 + /* Set the PMM bit (see comment below). */
10991 + mtmsrd(mfmsr() | MSR_PMM);
10993 + pfm_interrupt_handler(instruction_pointer(regs), regs);
10995 + mmcr0 = mfspr(SPRN_MMCR0);
10998 + * Reset the perfmon trigger if
10999 + * not in masking mode.
11001 + if (ctx->state != PFM_CTX_MASKED)
11002 + mmcr0 |= MMCR0_PMXE;
11005 + * We must clear the PMAO bit on some (GQ) chips. Just do it
11008 + mmcr0 &= ~MMCR0_PMAO;
11011 + * Now clear the freeze bit, counting will not start until we
11012 + * rfid from this exception, because only at that point will
11013 + * the PMM bit be cleared.
11015 + mmcr0 &= ~MMCR0_FC;
11016 + mtspr(SPRN_MMCR0, mmcr0);
11019 +static void pfm_power5_resend_irq(struct pfm_context *ctx)
11022 + * Assert the PMAO bit to cause a PMU interrupt. Make sure we
11023 + * trigger the edge detection circuitry for PMAO
11025 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
11026 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
11029 +struct pfm_arch_pmu_info pfm_power5_pmu_info = {
11030 + .pmu_style = PFM_POWERPC_PMU_POWER5,
11031 + .write_pmc = pfm_power5_write_pmc,
11032 + .write_pmd = pfm_power5_write_pmd,
11033 + .read_pmd = pfm_power5_read_pmd,
11034 + .irq_handler = pfm_power5_irq_handler,
11035 + .get_ovfl_pmds = pfm_power5_get_ovfl_pmds,
11036 + .enable_counters = pfm_power5_enable_counters,
11037 + .disable_counters = pfm_power5_disable_counters,
11038 + .resend_irq = pfm_power5_resend_irq
11042 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
11044 +static struct pfm_pmu_config pfm_power5_pmu_conf = {
11045 + .pmu_name = "POWER5",
11046 + .counter_width = 31,
11047 + .pmd_desc = pfm_power5_pmd_desc,
11048 + .pmc_desc = pfm_power5_pmc_desc,
11049 + .num_pmc_entries = PFM_PM_NUM_PMCS,
11050 + .num_pmd_entries = PFM_PM_NUM_PMDS,
11051 + .probe_pmu = pfm_power5_probe_pmu,
11052 + .pmu_info = &pfm_power5_pmu_info,
11053 + .flags = PFM_PMU_BUILTIN_FLAG,
11054 + .owner = THIS_MODULE
11057 +static int __init pfm_power5_pmu_init_module(void)
11059 + return pfm_pmu_register(&pfm_power5_pmu_conf);
11062 +static void __exit pfm_power5_pmu_cleanup_module(void)
11064 + pfm_pmu_unregister(&pfm_power5_pmu_conf);
11067 +module_init(pfm_power5_pmu_init_module);
11068 +module_exit(pfm_power5_pmu_cleanup_module);
11069 diff --git a/arch/powerpc/perfmon/perfmon_power6.c b/arch/powerpc/perfmon/perfmon_power6.c
11070 new file mode 100644
11071 index 0000000..7882feb
11073 +++ b/arch/powerpc/perfmon/perfmon_power6.c
11076 + * This file contains the POWER6 PMU register description tables
11077 + * and pmc checker used by perfmon.c.
11079 + * Copyright (c) 2007, IBM Corporation
11081 + * Based on perfmon_power5.c, and written by Carl Love <carll@us.ibm.com>
11082 + * and Kevin Corry <kevcorry@us.ibm.com>. Some fixes and refinement by
11083 + * Corey Ashford <cjashfor@us.ibm.com>
11085 + * This program is free software; you can redistribute it and/or
11086 + * modify it under the terms of version 2 of the GNU General Public
11087 + * License as published by the Free Software Foundation.
11089 + * This program is distributed in the hope that it will be useful,
11090 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11091 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11092 + * General Public License for more details.
11094 + * You should have received a copy of the GNU General Public License
11095 + * along with this program; if not, write to the Free Software
11096 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
11099 +#include <linux/module.h>
11100 +#include <linux/perfmon_kern.h>
11102 +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
11103 +MODULE_DESCRIPTION("POWER6 PMU description table");
11104 +MODULE_LICENSE("GPL");
11106 +static struct pfm_regmap_desc pfm_power6_pmc_desc[] = {
11107 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
11108 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
11109 +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
11111 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power6_pmc_desc)
11112 +#define PFM_DELTA_TB 10000 /* Not a real registers */
11113 +#define PFM_DELTA_PURR 10001
11116 + * counters wrap to zero at transition from 2^32-1 to 2^32. Note:
11117 + * interrupt generated at transition from 2^31-1 to 2^31
11119 +#define OVERFLOW_VALUE 0x100000000UL
11121 +/* The TB and PURR registers are read-only. Also, note that the TB register
11122 + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
11123 + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
11125 +static struct pfm_regmap_desc pfm_power6_pmd_desc[] = {
11126 + /* On POWER 6 PMC5 and PMC6 are not writable, they do not
11127 + * generate interrupts, and do not qualify their counts
11128 + * based on problem mode, supervisor mode or hypervisor mode.
11129 + * These two counters are implemented as virtual counters
11130 + * to make the appear to work like the other counters. A
11131 + * kernel timer is used sample the real PMC5 and PMC6 and
11132 + * update the virtual counters.
11134 +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
11135 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
11136 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
11137 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
11138 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
11139 +/* pmd5 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC5", SPRN_PMC5),
11140 +/* pmd6 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC6", SPRN_PMC6),
11141 +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
11142 +/* delta purr */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_TB", PFM_DELTA_TB),
11143 +/* delta tb */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_PURR", PFM_DELTA_PURR),
11146 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power6_pmd_desc)
11148 +u32 pmc5_start_save[NR_CPUS];
11149 +u32 pmc6_start_save[NR_CPUS];
11151 +static struct timer_list pmc5_6_update[NR_CPUS];
11152 +u64 enable_cntrs_cnt;
11153 +u64 disable_cntrs_cnt;
11155 +u64 pm5_6_interrupt;
11156 +u64 pm1_4_interrupt;
11157 +/* need ctx_arch for kernel timer. Can't get it in context of the kernel
11160 +struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS];
11161 +long int update_time;
11163 +static void delta(int cpu_num, struct pfm_arch_context *ctx_arch)
11169 + tmp5 = (u32) mfspr(SPRN_PMC5);
11170 + tmp6 = (u32) mfspr(SPRN_PMC6);
11173 + * The following difference calculation relies on 32-bit modular
11174 + * arithmetic for the deltas to come out correct (especially in the
11175 + * presence of a 32-bit counter wrap).
11177 + ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]);
11178 + ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]);
11180 + pmc5_start_save[cpu_num] = tmp5;
11181 + pmc6_start_save[cpu_num] = tmp6;
11187 +static void pmc5_6_updater(unsigned long cpu_num)
11189 + /* update the virtual pmd 5 and pmd 6 counters */
11191 + delta(cpu_num, pmc5_6_ctx_arch[cpu_num]);
11192 + mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time);
11196 +static int pfm_power6_probe_pmu(void)
11198 + unsigned long pvr = mfspr(SPRN_PVR);
11200 + switch (PVR_VER(pvr)) {
11204 + /* If this is a POWER5+ and the revision is less than 0x300,
11205 + don't treat it as a POWER6. */
11206 + return (PVR_REV(pvr) < 0x300) ? -1 : 0;
11212 +static void pfm_power6_write_pmc(unsigned int cnum, u64 value)
11214 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
11216 + mtspr(SPRN_MMCR0, value);
11219 + mtspr(SPRN_MMCR1, value);
11222 + mtspr(SPRN_MMCRA, value);
11229 +static void pfm_power6_write_pmd(unsigned int cnum, u64 value)
11231 + /* On POWER 6 PMC5 and PMC6 are implemented as
11232 + * virtual counters. See comment in pfm_power6_pmd_desc
11235 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
11237 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11239 + mtspr(SPRN_PMC1, value & ovfl_mask);
11242 + mtspr(SPRN_PMC2, value & ovfl_mask);
11245 + mtspr(SPRN_PMC3, value & ovfl_mask);
11248 + mtspr(SPRN_PMC4, value & ovfl_mask);
11252 + /* Ignore writes to read-only registers. */
11259 +static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum)
11261 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
11262 + int cpu_num = smp_processor_id();
11264 + /* On POWER 6 PMC5 and PMC6 are implemented as
11265 + * virtual counters. See comment in pfm_power6_pmd_desc
11269 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11271 + return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]);
11275 + return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]);
11278 + case PFM_DELTA_TB:
11279 + return ctx_arch->delta_tb
11280 + + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
11281 + - ctx_arch->delta_tb_start;
11284 + case PFM_DELTA_PURR:
11285 + return ctx_arch->delta_purr
11286 + + mfspr(SPRN_PURR)
11287 + - ctx_arch->delta_purr_start;
11295 +void pfm_power6_swrite(struct pfm_context *ctx, unsigned int cnum,
11298 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
11299 + int cpu_num = smp_processor_id();
11301 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11303 + pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
11304 + ctx_arch->powergs_pmc5 = val;
11308 + pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
11309 + ctx_arch->powergs_pmc6 = val;
11312 + case PFM_DELTA_TB:
11313 + ctx_arch->delta_tb_start =
11314 + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL));
11315 + ctx_arch->delta_tb = val;
11318 + case PFM_DELTA_PURR:
11319 + ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
11320 + ctx_arch->delta_purr = val;
11328 +static u64 pfm_power6_read_pmd(unsigned int cnum)
11330 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11332 + return mfspr(SPRN_PMC1);
11334 + return mfspr(SPRN_PMC2);
11336 + return mfspr(SPRN_PMC3);
11338 + return mfspr(SPRN_PMC4);
11340 + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
11342 + if (cpu_has_feature(CPU_FTR_PURR))
11343 + return mfspr(SPRN_PURR);
11353 + * pfm_power6_enable_counters
11356 +static void pfm_power6_enable_counters(struct pfm_context *ctx,
11357 + struct pfm_event_set *set)
11360 + unsigned int i, max_pmc;
11361 + int cpu_num = smp_processor_id();
11362 + struct pfm_arch_context *ctx_arch;
11364 + enable_cntrs_cnt++;
11366 + /* need the ctx passed down to the routine */
11367 + ctx_arch = pfm_ctx_arch(ctx);
11368 + max_pmc = ctx->regs.max_pmc;
11370 + /* Write MMCR0 last, and a fairly easy way to do this is to write
11371 + the registers in the reverse order */
11372 + for (i = max_pmc; i != 0; i--)
11373 + if (test_bit(i - 1, set->used_pmcs))
11374 + pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]);
11376 + /* save current free running HW event count */
11377 + pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
11378 + pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
11380 + ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
11382 + if (cpu_has_feature(CPU_FTR_PURR))
11383 + ctx_arch->delta_tb_start =
11384 + ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
11386 + ctx_arch->delta_tb_start = 0;
11388 + /* Start kernel timer for this cpu to periodically update
11389 + * the virtual counters.
11391 + init_timer(&pmc5_6_update[cpu_num]);
11392 + pmc5_6_update[cpu_num].function = pmc5_6_updater;
11393 + pmc5_6_update[cpu_num].data = (unsigned long) cpu_num;
11394 + pmc5_6_update[cpu_num].expires = jiffies + update_time;
11395 + /* context for this timer, timer will be removed if context
11396 + * is switched because the counters will be stopped first.
11397 + * NEEDS WORK, I think this is all ok, a little concerned about a
11398 + * race between the kernel timer going off right as the counters
11399 + * are being stopped and the context switching. Need to think
11402 + pmc5_6_ctx_arch[cpu_num] = ctx_arch;
11403 + add_timer(&pmc5_6_update[cpu_num]);
11407 + * pfm_power6_disable_counters
11410 +static void pfm_power6_disable_counters(struct pfm_context *ctx,
11411 + struct pfm_event_set *set)
11413 + struct pfm_arch_context *ctx_arch;
11414 + int cpu_num = smp_processor_id();
11416 + disable_cntrs_cnt++;
11418 + /* Set the Freeze Counters bit */
11419 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
11420 + asm volatile ("sync");
11422 + /* delete kernel update timer */
11423 + del_timer_sync(&pmc5_6_update[cpu_num]);
11425 + /* Update the virtual pmd 5 and 6 counters from the free running
11428 + ctx_arch = pfm_ctx_arch(ctx);
11429 + delta(cpu_num, ctx_arch);
11431 + ctx_arch->delta_tb +=
11432 + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
11433 + - ctx_arch->delta_tb_start;
11435 + ctx_arch->delta_purr += mfspr(SPRN_PURR)
11436 + - ctx_arch->delta_purr_start;
11440 + * pfm_power6_get_ovfl_pmds
11442 + * Determine which counters in this set have overflowed and fill in the
11443 + * set->povfl_pmds mask and set->npend_ovfls count.
11445 +static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx,
11446 + struct pfm_event_set *set)
11449 + unsigned int first_intr_pmd = ctx->regs.first_intr_pmd;
11450 + unsigned int max_intr_pmd = ctx->regs.max_intr_pmd;
11451 + u64 *used_pmds = set->used_pmds;
11452 + u64 *cntr_pmds = ctx->regs.cnt_pmds;
11453 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
11454 + u64 new_val, mask[PFM_PMD_BV];
11456 + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), max_intr_pmd);
11458 + /* max_intr_pmd is actually the last interrupting pmd register + 1 */
11459 + for (i = first_intr_pmd; i < max_intr_pmd; i++) {
11460 + if (test_bit(i, mask)) {
11461 + new_val = pfm_power6_read_pmd(i);
11462 + if (new_val & width_mask) {
11463 + set_bit(i, set->povfl_pmds);
11464 + set->npend_ovfls++;
11470 +static void pfm_power6_irq_handler(struct pt_regs *regs,
11471 + struct pfm_context *ctx)
11476 + /* Disable the counters (set the freeze bit) to not polute
11479 + mmcr0 = mfspr(SPRN_MMCR0);
11480 + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
11481 + mmcra = mfspr(SPRN_MMCRA);
11483 + /* Set the PMM bit (see comment below). */
11484 + mtmsrd(mfmsr() | MSR_PMM);
11486 + pm1_4_interrupt++;
11488 + pfm_interrupt_handler(instruction_pointer(regs), regs);
11490 + mmcr0 = mfspr(SPRN_MMCR0);
11493 + * Reset the perfmon trigger if
11494 + * not in masking mode.
11496 + if (ctx->state != PFM_CTX_MASKED)
11497 + mmcr0 |= MMCR0_PMXE;
11500 + * Clear the PMU Alert Occurred bit
11502 + mmcr0 &= ~MMCR0_PMAO;
11504 + /* Clear the appropriate bits in the MMCRA. */
11505 + mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER);
11506 + mtspr(SPRN_MMCRA, mmcra);
11509 + * Now clear the freeze bit, counting will not start until we
11510 + * rfid from this exception, because only at that point will
11511 + * the PMM bit be cleared.
11513 + mmcr0 &= ~MMCR0_FC;
11514 + mtspr(SPRN_MMCR0, mmcr0);
11517 +static void pfm_power6_resend_irq(struct pfm_context *ctx)
11520 + * Assert the PMAO bit to cause a PMU interrupt. Make sure we
11521 + * trigger the edge detection circuitry for PMAO
11523 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
11524 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
11527 +struct pfm_arch_pmu_info pfm_power6_pmu_info = {
11528 + .pmu_style = PFM_POWERPC_PMU_POWER6,
11529 + .write_pmc = pfm_power6_write_pmc,
11530 + .write_pmd = pfm_power6_write_pmd,
11531 + .read_pmd = pfm_power6_read_pmd,
11532 + .irq_handler = pfm_power6_irq_handler,
11533 + .get_ovfl_pmds = pfm_power6_get_ovfl_pmds,
11534 + .enable_counters = pfm_power6_enable_counters,
11535 + .disable_counters = pfm_power6_disable_counters,
11536 + .resend_irq = pfm_power6_resend_irq
11540 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
11542 +static struct pfm_pmu_config pfm_power6_pmu_conf = {
11543 + .pmu_name = "POWER6",
11544 + .counter_width = 31,
11545 + .pmd_desc = pfm_power6_pmd_desc,
11546 + .pmc_desc = pfm_power6_pmc_desc,
11547 + .num_pmc_entries = PFM_PM_NUM_PMCS,
11548 + .num_pmd_entries = PFM_PM_NUM_PMDS,
11549 + .probe_pmu = pfm_power6_probe_pmu,
11550 + .pmu_info = &pfm_power6_pmu_info,
11551 + .pmd_sread = pfm_power6_sread,
11552 + .pmd_swrite = pfm_power6_swrite,
11553 + .flags = PFM_PMU_BUILTIN_FLAG,
11554 + .owner = THIS_MODULE
11557 +static int __init pfm_power6_pmu_init_module(void)
11560 + disable_cntrs_cnt = 0;
11561 + enable_cntrs_cnt = 0;
11563 + pm5_6_interrupt = 0;
11564 + pm1_4_interrupt = 0;
11566 + /* calculate the time for updating counters 5 and 6 */
11569 + * MAX_EVENT_RATE assumes a max instruction issue rate of 2
11570 + * instructions per clock cycle. Experience shows that this factor
11571 + * of 2 is more than adequate.
11574 +# define MAX_EVENT_RATE (ppc_proc_freq * 2)
11577 + * Calculate the time, in jiffies, it takes for event counter 5 or
11578 + * 6 to completely wrap when counting at the max event rate, and
11579 + * then figure on sampling at twice that rate.
11581 + update_time = (((unsigned long)HZ * OVERFLOW_VALUE)
11582 + / ((unsigned long)MAX_EVENT_RATE)) / 2;
11584 + ret = pfm_pmu_register(&pfm_power6_pmu_conf);
11588 +static void __exit pfm_power6_pmu_cleanup_module(void)
11590 + pfm_pmu_unregister(&pfm_power6_pmu_conf);
11593 +module_init(pfm_power6_pmu_init_module);
11594 +module_exit(pfm_power6_pmu_cleanup_module);
11595 diff --git a/arch/powerpc/perfmon/perfmon_ppc32.c b/arch/powerpc/perfmon/perfmon_ppc32.c
11596 new file mode 100644
11597 index 0000000..76f0b84
11599 +++ b/arch/powerpc/perfmon/perfmon_ppc32.c
11602 + * This file contains the PPC32 PMU register description tables
11603 + * and pmc checker used by perfmon.c.
11605 + * Philip Mucci, mucci@cs.utk.edu
11607 + * Based on code from:
11608 + * Copyright (c) 2005 David Gibson, IBM Corporation.
11610 + * Based on perfmon_p6.c:
11611 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
11612 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
11614 + * This program is free software; you can redistribute it and/or
11615 + * modify it under the terms of version 2 of the GNU General Public
11616 + * License as published by the Free Software Foundation.
11618 + * This program is distributed in the hope that it will be useful,
11619 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11620 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11621 + * General Public License for more details.
11623 + * You should have received a copy of the GNU General Public License
11624 + * along with this program; if not, write to the Free Software
11625 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
11628 +#include <linux/module.h>
11629 +#include <linux/perfmon_kern.h>
11630 +#include <asm/reg.h>
11632 +MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>");
11633 +MODULE_DESCRIPTION("PPC32 PMU description table");
11634 +MODULE_LICENSE("GPL");
11636 +static struct pfm_pmu_config pfm_ppc32_pmu_conf;
11638 +static struct pfm_regmap_desc pfm_ppc32_pmc_desc[] = {
11639 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", 0x0, 0, 0, SPRN_MMCR0),
11640 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0x0, 0, 0, SPRN_MMCR1),
11641 +/* mmcr2 */ PMC_D(PFM_REG_I, "MMCR2", 0x0, 0, 0, SPRN_MMCR2),
11643 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_ppc32_pmc_desc)
11645 +static struct pfm_regmap_desc pfm_ppc32_pmd_desc[] = {
11646 +/* pmd0 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
11647 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
11648 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
11649 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
11650 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
11651 +/* pmd5 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
11653 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_ppc32_pmd_desc)
11655 +static void perfmon_perf_irq(struct pt_regs *regs)
11659 + /* BLATANTLY STOLEN FROM OPROFILE, then modified */
11661 + /* set the PMM bit (see comment below) */
11662 + mtmsr(mfmsr() | MSR_PMM);
11664 + pfm_interrupt_handler(instruction_pointer(regs), regs);
11666 + /* The freeze bit was set by the interrupt.
11667 + * Clear the freeze bit, and reenable the interrupt.
11668 + * The counters won't actually start until the rfi clears
11672 + /* Unfreezes the counters on this CPU, enables the interrupt,
11673 + * enables the counters to trigger the interrupt, and sets the
11674 + * counters to only count when the mark bit is not set.
11676 + mmcr0 = mfspr(SPRN_MMCR0);
11678 + mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0);
11679 + mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
11681 + mtspr(SPRN_MMCR0, mmcr0);
11684 +static int pfm_ppc32_probe_pmu(void)
11686 + enum ppc32_pmu_type pm_type;
11687 + int nmmcr = 0, npmds = 0, intsok = 0, i;
11688 + unsigned int pvr;
11691 + pvr = mfspr(SPRN_PVR);
11693 + switch (PVR_VER(pvr)) {
11694 + case 0x0004: /* 604 */
11696 + pm_type = PFM_POWERPC_PMU_604;
11700 + case 0x0009: /* 604e; */
11701 + case 0x000A: /* 604ev */
11703 + pm_type = PFM_POWERPC_PMU_604e;
11707 + case 0x0008: /* 750/740 */
11709 + pm_type = PFM_POWERPC_PMU_750;
11713 + case 0x7000: /* 750FX */
11716 + pm_type = PFM_POWERPC_PMU_750;
11719 + if ((pvr & 0xFF0F) >= 0x0203)
11722 + case 0x7002: /* 750GX */
11724 + pm_type = PFM_POWERPC_PMU_750;
11728 + case 0x000C: /* 7400 */
11730 + pm_type = PFM_POWERPC_PMU_7400;
11734 + case 0x800C: /* 7410 */
11736 + pm_type = PFM_POWERPC_PMU_7400;
11739 + if ((pvr & 0xFFFF) >= 0x01103)
11742 + case 0x8000: /* 7451/7441 */
11743 + case 0x8001: /* 7455/7445 */
11744 + case 0x8002: /* 7457/7447 */
11745 + case 0x8003: /* 7447A */
11746 + case 0x8004: /* 7448 */
11748 + pm_type = PFM_POWERPC_PMU_7450;
11749 + nmmcr = 3; npmds = 6;
11753 + PFM_INFO("Unknown PVR_VER(0x%x)\n", PVR_VER(pvr));
11758 + * deconfigure unimplemented registers
11760 + for (i = npmds; i < PFM_PM_NUM_PMDS; i++)
11761 + pfm_ppc32_pmd_desc[i].type = PFM_REG_NA;
11763 + for (i = nmmcr; i < PFM_PM_NUM_PMCS; i++)
11764 + pfm_ppc32_pmc_desc[i].type = PFM_REG_NA;
11767 + * update PMU description structure
11769 + pfm_ppc32_pmu_conf.pmu_name = str;
11770 + pfm_ppc32_pmu_info.pmu_style = pm_type;
11771 + pfm_ppc32_pmu_conf.num_pmc_entries = nmmcr;
11772 + pfm_ppc32_pmu_conf.num_pmd_entries = npmds;
11775 + PFM_INFO("Interrupts unlikely to work\n");
11777 + return reserve_pmc_hardware(perfmon_perf_irq);
11780 +static void pfm_ppc32_write_pmc(unsigned int cnum, u64 value)
11782 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
11784 + mtspr(SPRN_MMCR0, value);
11787 + mtspr(SPRN_MMCR1, value);
11790 + mtspr(SPRN_MMCR2, value);
11797 +static void pfm_ppc32_write_pmd(unsigned int cnum, u64 value)
11799 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11801 + mtspr(SPRN_PMC1, value);
11804 + mtspr(SPRN_PMC2, value);
11807 + mtspr(SPRN_PMC3, value);
11810 + mtspr(SPRN_PMC4, value);
11813 + mtspr(SPRN_PMC5, value);
11816 + mtspr(SPRN_PMC6, value);
11823 +static u64 pfm_ppc32_read_pmd(unsigned int cnum)
11825 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11827 + return mfspr(SPRN_PMC1);
11829 + return mfspr(SPRN_PMC2);
11831 + return mfspr(SPRN_PMC3);
11833 + return mfspr(SPRN_PMC4);
11835 + return mfspr(SPRN_PMC5);
11837 + return mfspr(SPRN_PMC6);
11844 + * pfm_ppc32_enable_counters
11846 + * Just need to load the current values into the control registers.
11848 +static void pfm_ppc32_enable_counters(struct pfm_context *ctx,
11849 + struct pfm_event_set *set)
11851 + unsigned int i, max_pmc;
11853 + max_pmc = pfm_pmu_conf->regs.max_pmc;
11855 + for (i = 0; i < max_pmc; i++)
11856 + if (test_bit(i, set->used_pmcs))
11857 + pfm_ppc32_write_pmc(i, set->pmcs[i]);
11861 + * pfm_ppc32_disable_counters
11863 + * Just need to zero all the control registers.
11865 +static void pfm_ppc32_disable_counters(struct pfm_context *ctx,
11866 + struct pfm_event_set *set)
11868 + unsigned int i, max;
11870 + max = pfm_pmu_conf->regs.max_pmc;
11872 + for (i = 0; i < max; i++)
11873 + if (test_bit(i, set->used_pmcs))
11874 + pfm_ppc32_write_pmc(ctx, 0);
11878 + * pfm_ppc32_get_ovfl_pmds
11880 + * Determine which counters in this set have overflowed and fill in the
11881 + * set->povfl_pmds mask and set->npend_ovfls count.
11883 +static void pfm_ppc32_get_ovfl_pmds(struct pfm_context *ctx,
11884 + struct pfm_event_set *set)
11887 + unsigned int max_pmd = pfm_pmu_conf->regs.max_cnt_pmd;
11888 + u64 *used_pmds = set->used_pmds;
11889 + u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds;
11890 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
11891 + u64 new_val, mask[PFM_PMD_BV];
11893 + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds),
11894 + cast_ulp(used_pmds), max_pmd);
11896 + for (i = 0; i < max_pmd; i++) {
11897 + if (test_bit(i, mask)) {
11898 + new_val = pfm_ppc32_read_pmd(i);
11899 + if (new_val & width_mask) {
11900 + set_bit(i, set->povfl_pmds);
11901 + set->npend_ovfls++;
11907 +struct pfm_arch_pmu_info pfm_ppc32_pmu_info = {
11908 + .pmu_style = PFM_POWERPC_PMU_NONE,
11909 + .write_pmc = pfm_ppc32_write_pmc,
11910 + .write_pmd = pfm_ppc32_write_pmd,
11911 + .read_pmd = pfm_ppc32_read_pmd,
11912 + .get_ovfl_pmds = pfm_ppc32_get_ovfl_pmds,
11913 + .enable_counters = pfm_ppc32_enable_counters,
11914 + .disable_counters = pfm_ppc32_disable_counters,
11917 +static struct pfm_pmu_config pfm_ppc32_pmu_conf = {
11918 + .counter_width = 31,
11919 + .pmd_desc = pfm_ppc32_pmd_desc,
11920 + .pmc_desc = pfm_ppc32_pmc_desc,
11921 + .probe_pmu = pfm_ppc32_probe_pmu,
11922 + .flags = PFM_PMU_BUILTIN_FLAG,
11923 + .owner = THIS_MODULE,
11924 + .version = "0.1",
11925 + .arch_info = &pfm_ppc32_pmu_info,
11928 +static int __init pfm_ppc32_pmu_init_module(void)
11930 + return pfm_pmu_register(&pfm_ppc32_pmu_conf);
11933 +static void __exit pfm_ppc32_pmu_cleanup_module(void)
11935 + release_pmc_hardware();
11936 + pfm_pmu_unregister(&pfm_ppc32_pmu_conf);
11939 +module_init(pfm_ppc32_pmu_init_module);
11940 +module_exit(pfm_ppc32_pmu_cleanup_module);
11941 diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
11942 index dbc338f..e24320e 100644
11943 --- a/arch/powerpc/platforms/cell/cbe_regs.c
11944 +++ b/arch/powerpc/platforms/cell/cbe_regs.c
11945 @@ -33,6 +33,7 @@ static struct cbe_regs_map
11946 struct cbe_iic_regs __iomem *iic_regs;
11947 struct cbe_mic_tm_regs __iomem *mic_tm_regs;
11948 struct cbe_pmd_shadow_regs pmd_shadow_regs;
11949 + struct cbe_ppe_priv_regs __iomem *ppe_priv_regs;
11950 } cbe_regs_maps[MAX_CBE];
11951 static int cbe_regs_map_count;
11953 @@ -145,6 +146,23 @@ struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
11955 EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
11957 +struct cbe_ppe_priv_regs __iomem *cbe_get_ppe_priv_regs(struct device_node *np)
11959 + struct cbe_regs_map *map = cbe_find_map(np);
11962 + return map->ppe_priv_regs;
11965 +struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu)
11967 + struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
11970 + return map->ppe_priv_regs;
11972 +EXPORT_SYMBOL_GPL(cbe_get_cpu_ppe_priv_regs);
11974 u32 cbe_get_hw_thread_id(int cpu)
11976 return cbe_thread_map[cpu].thread_id;
11977 @@ -206,6 +224,11 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map)
11978 for_each_node_by_type(np, "mic-tm")
11979 if (of_get_parent(np) == be)
11980 map->mic_tm_regs = of_iomap(np, 0);
11982 + for_each_node_by_type(np, "ppe-mmio")
11983 + if (of_get_parent(np) == be)
11984 + map->ppe_priv_regs = of_iomap(np, 0);
11987 struct device_node *cpu;
11988 /* That hack must die die die ! */
11989 @@ -227,6 +250,10 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map)
11990 prop = of_get_property(cpu, "mic-tm", NULL);
11992 map->mic_tm_regs = ioremap(prop->address, prop->len);
11994 + prop = of_get_property(cpu, "ppe-mmio", NULL);
11995 + if (prop != NULL)
11996 + map->ppe_priv_regs = ioremap(prop->address, prop->len);
12000 diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
12001 index 109ae24..bafe5a6 100644
12002 --- a/arch/sparc/include/asm/hypervisor.h
12003 +++ b/arch/sparc/include/asm/hypervisor.h
12004 @@ -2713,6 +2713,30 @@ extern unsigned long sun4v_ldc_revoke(unsigned long channel,
12006 #define HV_FAST_SET_PERFREG 0x101
12008 +#define HV_N2_PERF_SPARC_CTL 0x0
12009 +#define HV_N2_PERF_DRAM_CTL0 0x1
12010 +#define HV_N2_PERF_DRAM_CNT0 0x2
12011 +#define HV_N2_PERF_DRAM_CTL1 0x3
12012 +#define HV_N2_PERF_DRAM_CNT1 0x4
12013 +#define HV_N2_PERF_DRAM_CTL2 0x5
12014 +#define HV_N2_PERF_DRAM_CNT2 0x6
12015 +#define HV_N2_PERF_DRAM_CTL3 0x7
12016 +#define HV_N2_PERF_DRAM_CNT3 0x8
12018 +#define HV_FAST_N2_GET_PERFREG 0x104
12019 +#define HV_FAST_N2_SET_PERFREG 0x105
12021 +#ifndef __ASSEMBLY__
12022 +extern unsigned long sun4v_niagara_getperf(unsigned long reg,
12023 + unsigned long *val);
12024 +extern unsigned long sun4v_niagara_setperf(unsigned long reg,
12025 + unsigned long val);
12026 +extern unsigned long sun4v_niagara2_getperf(unsigned long reg,
12027 + unsigned long *val);
12028 +extern unsigned long sun4v_niagara2_setperf(unsigned long reg,
12029 + unsigned long val);
12032 /* MMU statistics services.
12034 * The hypervisor maintains MMU statistics and privileged code provides
12035 diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
12036 index e3dd930..6cf3aec 100644
12037 --- a/arch/sparc/include/asm/irq_64.h
12038 +++ b/arch/sparc/include/asm/irq_64.h
12039 @@ -67,6 +67,9 @@ extern void virt_irq_free(unsigned int virt_irq);
12040 extern void __init init_IRQ(void);
12041 extern void fixup_irqs(void);
12043 +extern int register_perfctr_intr(void (*handler)(struct pt_regs *));
12044 +extern void release_perfctr_intr(void (*handler)(struct pt_regs *));
12046 static inline void set_softint(unsigned long bits)
12048 __asm__ __volatile__("wr %0, 0x0, %%set_softint"
12049 diff --git a/arch/sparc/include/asm/perfmon.h b/arch/sparc/include/asm/perfmon.h
12050 new file mode 100644
12051 index 0000000..f20cbfa
12053 +++ b/arch/sparc/include/asm/perfmon.h
12055 +#ifndef _SPARC64_PERFMON_H_
12056 +#define _SPARC64_PERFMON_H_
12059 + * arch-specific user visible interface definitions
12062 +#define PFM_ARCH_MAX_PMCS 2
12063 +#define PFM_ARCH_MAX_PMDS 3
12065 +#endif /* _SPARC64_PERFMON_H_ */
12066 diff --git a/arch/sparc/include/asm/perfmon_kern.h b/arch/sparc/include/asm/perfmon_kern.h
12067 new file mode 100644
12068 index 0000000..033eff5
12070 +++ b/arch/sparc/include/asm/perfmon_kern.h
12072 +#ifndef _SPARC64_PERFMON_KERN_H_
12073 +#define _SPARC64_PERFMON_KERN_H_
12077 +#ifdef CONFIG_PERFMON
12079 +#include <linux/irq.h>
12080 +#include <asm/system.h>
12082 +#define PFM_ARCH_PMD_STK_ARG 2
12083 +#define PFM_ARCH_PMC_STK_ARG 1
12085 +struct pfm_arch_pmu_info {
12089 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
12093 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
12094 + struct pfm_event_set *set)
12097 +static inline void pfm_arch_serialize(void)
12102 + * SPARC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
12103 + * this routine needs to do it when switching sets on overflow
12105 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
12106 + struct pfm_event_set *set)
12108 + pfm_save_pmds(ctx, set);
12111 +extern void pfm_arch_write_pmc(struct pfm_context *ctx,
12112 + unsigned int cnum, u64 value);
12113 +extern u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum);
12115 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
12116 + unsigned int cnum, u64 value)
12120 + value &= pfm_pmu_conf->ovfl_mask;
12126 + pic = (pic & 0xffffffff00000000UL) |
12127 + (value & 0xffffffffUL);
12130 + pic = (pic & 0xffffffffUL) |
12140 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx,
12141 + unsigned int cnum)
12149 + return pic & 0xffffffffUL;
12151 + return pic >> 32UL;
12159 + * For some CPUs, the upper bits of a counter must be set in order for the
12160 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
12161 + * and the upper bits are cleared. This function may be used to set them back.
12163 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
12164 + unsigned int cnum)
12166 + u64 val = pfm_arch_read_pmd(ctx, cnum);
12168 + /* This masks out overflow bit 31 */
12169 + pfm_arch_write_pmd(ctx, cnum, val);
12173 + * At certain points, perfmon needs to know if monitoring has been
12174 + * explicitely started/stopped by user via pfm_start/pfm_stop. The
12175 + * information is tracked in ctx.flags.started. However on certain
12176 + * architectures, it may be possible to start/stop directly from
12177 + * user level with a single assembly instruction bypassing
12178 + * the kernel. This function must be used to determine by
12179 + * an arch-specific mean if monitoring is actually started/stopped.
12181 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
12183 + return ctx->flags.started;
12186 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
12187 + struct pfm_context *ctx)
12191 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
12192 + struct pfm_context *ctx)
12196 +static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
12197 + struct pfm_context *ctx)
12201 +int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
12202 +int pfm_arch_ctxswout_thread(struct task_struct *task,
12203 + struct pfm_context *ctx);
12204 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
12205 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
12206 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
12207 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
12208 +char *pfm_arch_get_pmu_module_name(void);
12210 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
12211 + struct pfm_event_set *set)
12213 + pfm_arch_stop(current, ctx);
12215 + * we mark monitoring as stopped to avoid
12216 + * certain side effects especially in
12217 + * pfm_switch_sets_from_intr() on
12218 + * pfm_arch_restore_pmcs()
12220 + ctx->flags.started = 0;
12224 + * unfreeze PMU from pfm_do_interrupt_handler()
12225 + * ctx may be NULL for spurious
12227 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
12232 + PFM_DBG_ovfl("state=%d", ctx->state);
12234 + ctx->flags.started = 1;
12236 + if (ctx->state == PFM_CTX_MASKED)
12239 + pfm_arch_restore_pmcs(ctx, ctx->active_set);
12243 + * this function is called from the PMU interrupt handler ONLY.
12244 + * On SPARC, the PMU is frozen via arch_stop, masking would be implemented
12245 + * via arch-stop as well. Given that the PMU is already stopped when
12246 + * entering the interrupt handler, we do not need to stop it again, so
12247 + * this function is a nop.
12249 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
12250 + struct pfm_event_set *set)
12255 + * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
12256 + * need to start here.
12258 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
12259 + struct pfm_event_set *set)
12261 + pfm_arch_start(current, ctx);
12264 +static inline void pfm_arch_pmu_config_remove(void)
12268 +static inline int pfm_arch_context_create(struct pfm_context *ctx,
12274 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
12279 + * function called from pfm_setfl_sane(). Context is locked
12280 + * and interrupts are masked.
12281 + * The value of flags is the value of ctx_flags as passed by
12284 + * function must check arch-specific set flags.
12286 + * 1 when flags are valid
12289 +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
12294 +static inline int pfm_arch_init(void)
12299 +static inline void pfm_arch_init_percpu(void)
12303 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
12308 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
12311 +extern void perfmon_interrupt(struct pt_regs *);
12313 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
12315 + return register_perfctr_intr(perfmon_interrupt);
12318 +static inline void pfm_arch_pmu_release(void)
12320 + release_perfctr_intr(perfmon_interrupt);
12323 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
12326 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
12329 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
12334 +static inline int pfm_arch_get_base_syscall(void)
12336 + return __NR_pfm_create_context;
12339 +struct pfm_arch_context {
12343 +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
12345 + * SPARC needs extra alignment for the sampling buffer
12347 +#define PFM_ARCH_SMPL_ALIGN_SIZE (16 * 1024)
12349 +static inline void pfm_cacheflush(void *addr, unsigned int len)
12353 +#endif /* CONFIG_PERFMON */
12355 +#endif /* __KERNEL__ */
12357 +#endif /* _SPARC64_PERFMON_KERN_H_ */
12358 diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
12359 index db9e742..2a9ddb9 100644
12360 --- a/arch/sparc/include/asm/system_64.h
12361 +++ b/arch/sparc/include/asm/system_64.h
12362 @@ -30,6 +30,9 @@ enum sparc_cpu {
12363 #define ARCH_SUN4C_SUN4 0
12364 #define ARCH_SUN4 0
12366 +extern char *sparc_cpu_type;
12367 +extern char *sparc_fpu_type;
12368 +extern char *sparc_pmu_type;
12369 extern char reboot_command[];
12371 /* These are here in an effort to more fully work around Spitfire Errata
12372 @@ -104,15 +107,13 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
12373 #define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
12374 #define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
12376 -/* Blackbird errata workaround. See commentary in
12377 - * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
12378 - * for more information.
12380 -#define reset_pic() \
12381 - __asm__ __volatile__("ba,pt %xcc, 99f\n\t" \
12382 +/* Blackbird errata workaround. */
12383 +#define write_pic(val) \
12384 + __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
12386 - "99:wr %g0, 0x0, %pic\n\t" \
12388 + "99:wr %0, 0x0, %%pic\n\t" \
12389 + "rd %%pic, %%g0" : : "r" (val))
12390 +#define reset_pic() write_pic(0)
12392 #ifndef __ASSEMBLY__
12394 @@ -145,14 +146,10 @@ do { \
12395 * and 2 stores in this critical code path. -DaveM
12397 #define switch_to(prev, next, last) \
12398 -do { if (test_thread_flag(TIF_PERFCTR)) { \
12399 - unsigned long __tmp; \
12400 - read_pcr(__tmp); \
12401 - current_thread_info()->pcr_reg = __tmp; \
12402 - read_pic(__tmp); \
12403 - current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\
12404 - current_thread_info()->kernel_cntd1 += ((__tmp) >> 32); \
12406 +do { if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
12407 + pfm_ctxsw_out(prev, next); \
12408 + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
12409 + pfm_ctxsw_in(prev, next); \
12410 flush_tlb_pending(); \
12411 save_and_clear_fpu(); \
12412 /* If you are tempted to conditionalize the following */ \
12413 @@ -197,11 +194,6 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
12414 "l1", "l2", "l3", "l4", "l5", "l6", "l7", \
12415 "i0", "i1", "i2", "i3", "i4", "i5", \
12416 "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \
12417 - /* If you fuck with this, update ret_from_syscall code too. */ \
12418 - if (test_thread_flag(TIF_PERFCTR)) { \
12419 - write_pcr(current_thread_info()->pcr_reg); \
12424 static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
12425 diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
12426 index c0a737d..53857f7 100644
12427 --- a/arch/sparc/include/asm/thread_info_64.h
12428 +++ b/arch/sparc/include/asm/thread_info_64.h
12429 @@ -58,11 +58,6 @@ struct thread_info {
12430 unsigned long gsr[7];
12431 unsigned long xfsr[7];
12433 - __u64 __user *user_cntd0;
12434 - __u64 __user *user_cntd1;
12435 - __u64 kernel_cntd0, kernel_cntd1;
12438 struct restart_block restart_block;
12440 struct pt_regs *kern_una_regs;
12441 @@ -96,15 +91,10 @@ struct thread_info {
12442 #define TI_RWIN_SPTRS 0x000003c8
12443 #define TI_GSR 0x00000400
12444 #define TI_XFSR 0x00000438
12445 -#define TI_USER_CNTD0 0x00000470
12446 -#define TI_USER_CNTD1 0x00000478
12447 -#define TI_KERN_CNTD0 0x00000480
12448 -#define TI_KERN_CNTD1 0x00000488
12449 -#define TI_PCR 0x00000490
12450 -#define TI_RESTART_BLOCK 0x00000498
12451 -#define TI_KUNA_REGS 0x000004c0
12452 -#define TI_KUNA_INSN 0x000004c8
12453 -#define TI_FPREGS 0x00000500
12454 +#define TI_RESTART_BLOCK 0x00000470
12455 +#define TI_KUNA_REGS 0x00000498
12456 +#define TI_KUNA_INSN 0x000004a0
12457 +#define TI_FPREGS 0x000004c0
12459 /* We embed this in the uppermost byte of thread_info->flags */
12460 #define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */
12461 @@ -222,11 +212,11 @@ register struct thread_info *current_thread_info_reg asm("g6");
12462 #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
12463 #define TIF_SIGPENDING 2 /* signal pending */
12464 #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
12465 -#define TIF_PERFCTR 4 /* performance counters active */
12466 +/* Bit 4 is available */
12467 #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */
12468 /* flag bit 6 is available */
12469 #define TIF_32BIT 7 /* 32-bit binary */
12470 -/* flag bit 8 is available */
12471 +#define TIF_PERFMON_WORK 8 /* work for pfm_handle_work() */
12472 #define TIF_SECCOMP 9 /* secure computing */
12473 #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */
12474 /* flag bit 11 is available */
12475 @@ -237,22 +227,24 @@ register struct thread_info *current_thread_info_reg asm("g6");
12476 #define TIF_ABI_PENDING 12
12477 #define TIF_MEMDIE 13
12478 #define TIF_POLLING_NRFLAG 14
12479 +#define TIF_PERFMON_CTXSW 15 /* perfmon needs ctxsw calls */
12481 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
12482 #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
12483 #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
12484 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
12485 -#define _TIF_PERFCTR (1<<TIF_PERFCTR)
12486 #define _TIF_UNALIGNED (1<<TIF_UNALIGNED)
12487 #define _TIF_32BIT (1<<TIF_32BIT)
12488 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
12489 #define _TIF_SECCOMP (1<<TIF_SECCOMP)
12490 #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
12491 #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
12492 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
12493 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
12495 #define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \
12496 _TIF_DO_NOTIFY_RESUME_MASK | \
12497 - _TIF_NEED_RESCHED | _TIF_PERFCTR)
12498 + _TIF_NEED_RESCHED)
12499 #define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING)
12502 diff --git a/arch/sparc/include/asm/unistd_32.h b/arch/sparc/include/asm/unistd_32.h
12503 index 648643a..efe4d86 100644
12504 --- a/arch/sparc/include/asm/unistd_32.h
12505 +++ b/arch/sparc/include/asm/unistd_32.h
12506 @@ -338,8 +338,20 @@
12507 #define __NR_dup3 320
12508 #define __NR_pipe2 321
12509 #define __NR_inotify_init1 322
12510 +#define __NR_pfm_create_context 323
12511 +#define __NR_pfm_write_pmcs 324
12512 +#define __NR_pfm_write_pmds 325
12513 +#define __NR_pfm_read_pmds 326
12514 +#define __NR_pfm_load_context 327
12515 +#define __NR_pfm_start 328
12516 +#define __NR_pfm_stop 329
12517 +#define __NR_pfm_restart 330
12518 +#define __NR_pfm_create_evtsets 331
12519 +#define __NR_pfm_getinfo_evtsets 332
12520 +#define __NR_pfm_delete_evtsets 333
12521 +#define __NR_pfm_unload_context 334
12523 -#define NR_SYSCALLS 323
12524 +#define NR_SYSCALLS 325
12526 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
12527 * it never had the plain ones and there is no value to adding those
12528 diff --git a/arch/sparc/include/asm/unistd_64.h b/arch/sparc/include/asm/unistd_64.h
12529 index c5cc0e0..cbbb0b5 100644
12530 --- a/arch/sparc/include/asm/unistd_64.h
12531 +++ b/arch/sparc/include/asm/unistd_64.h
12532 @@ -340,8 +340,20 @@
12533 #define __NR_dup3 320
12534 #define __NR_pipe2 321
12535 #define __NR_inotify_init1 322
12536 +#define __NR_pfm_create_context 323
12537 +#define __NR_pfm_write_pmcs 324
12538 +#define __NR_pfm_write_pmds 325
12539 +#define __NR_pfm_read_pmds 326
12540 +#define __NR_pfm_load_context 327
12541 +#define __NR_pfm_start 328
12542 +#define __NR_pfm_stop 329
12543 +#define __NR_pfm_restart 330
12544 +#define __NR_pfm_create_evtsets 331
12545 +#define __NR_pfm_getinfo_evtsets 332
12546 +#define __NR_pfm_delete_evtsets 333
12547 +#define __NR_pfm_unload_context 334
12549 -#define NR_SYSCALLS 323
12550 +#define NR_SYSCALLS 335
12553 #define __ARCH_WANT_IPC_PARSE_VERSION
12554 diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
12555 index e1b9233..727e4e7 100644
12556 --- a/arch/sparc/kernel/systbls.S
12557 +++ b/arch/sparc/kernel/systbls.S
12558 @@ -81,4 +81,6 @@ sys_call_table:
12559 /*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
12560 /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
12561 /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
12562 -/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1
12563 +/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs, sys_pfm_write_pmds
12564 +/*325*/ .long sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
12565 +/*330*/ .long sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
12566 diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
12567 index 36b4b7a..5555d1e 100644
12568 --- a/arch/sparc64/Kconfig
12569 +++ b/arch/sparc64/Kconfig
12570 @@ -401,6 +401,8 @@ source "drivers/sbus/char/Kconfig"
12572 source "fs/Kconfig"
12574 +source "arch/sparc64/perfmon/Kconfig"
12576 source "arch/sparc64/Kconfig.debug"
12578 source "security/Kconfig"
12579 diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile
12580 index b785a39..646731c 100644
12581 --- a/arch/sparc64/Makefile
12582 +++ b/arch/sparc64/Makefile
12583 @@ -32,6 +32,8 @@ core-y += arch/sparc64/math-emu/
12584 libs-y += arch/sparc64/prom/ arch/sparc64/lib/
12585 drivers-$(CONFIG_OPROFILE) += arch/sparc64/oprofile/
12587 +core-$(CONFIG_PERFMON) += arch/sparc64/perfmon/
12589 boot := arch/sparc64/boot
12591 image tftpboot.img vmlinux.aout: vmlinux
12592 diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
12593 index 0097c08..f839f84 100644
12594 --- a/arch/sparc64/kernel/cpu.c
12595 +++ b/arch/sparc64/kernel/cpu.c
12596 @@ -20,16 +20,17 @@
12597 DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
12599 struct cpu_iu_info {
12602 - char* cpu_name; /* should be enough I hope... */
12609 struct cpu_fp_info {
12620 static struct cpu_fp_info linux_sparc_fpu[] = {
12621 @@ -49,23 +50,24 @@ static struct cpu_fp_info linux_sparc_fpu[] = {
12622 #define NSPARCFPU ARRAY_SIZE(linux_sparc_fpu)
12624 static struct cpu_iu_info linux_sparc_chips[] = {
12625 - { 0x17, 0x10, "TI UltraSparc I (SpitFire)"},
12626 - { 0x22, 0x10, "TI UltraSparc I (SpitFire)"},
12627 - { 0x17, 0x11, "TI UltraSparc II (BlackBird)"},
12628 - { 0x17, 0x12, "TI UltraSparc IIi (Sabre)"},
12629 - { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)"},
12630 - { 0x3e, 0x14, "TI UltraSparc III (Cheetah)"},
12631 - { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)"},
12632 - { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)"},
12633 - { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)"},
12634 - { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)"},
12635 - { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)"},
12637 + { 0x17, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"},
12638 + { 0x22, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"},
12639 + { 0x17, 0x11, "TI UltraSparc II (BlackBird)", "ultra12"},
12640 + { 0x17, 0x12, "TI UltraSparc IIi (Sabre)", "ultra12"},
12641 + { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"},
12642 + { 0x3e, 0x14, "TI UltraSparc III (Cheetah)", "ultra3"},
12643 + { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"},
12644 + { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"},
12645 + { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)", "ultra4"},
12646 + { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"},
12647 + { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3+"},
12650 #define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
12652 char *sparc_cpu_type;
12653 char *sparc_fpu_type;
12654 +char *sparc_pmu_type;
12656 static void __init sun4v_cpu_probe(void)
12658 @@ -73,11 +75,13 @@ static void __init sun4v_cpu_probe(void)
12659 case SUN4V_CHIP_NIAGARA1:
12660 sparc_cpu_type = "UltraSparc T1 (Niagara)";
12661 sparc_fpu_type = "UltraSparc T1 integrated FPU";
12662 + sparc_pmu_type = "niagara";
12665 case SUN4V_CHIP_NIAGARA2:
12666 sparc_cpu_type = "UltraSparc T2 (Niagara2)";
12667 sparc_fpu_type = "UltraSparc T2 integrated FPU";
12668 + sparc_pmu_type = "niagara2";
12672 @@ -85,6 +89,7 @@ static void __init sun4v_cpu_probe(void)
12673 prom_cpu_compatible);
12674 sparc_cpu_type = "Unknown SUN4V CPU";
12675 sparc_fpu_type = "Unknown SUN4V FPU";
12676 + sparc_pmu_type = "Unknown SUN4V PMU";
12680 @@ -117,6 +122,8 @@ retry:
12681 if (linux_sparc_chips[i].impl == impl) {
12683 linux_sparc_chips[i].cpu_name;
12685 + linux_sparc_chips[i].pmu_name;
12689 @@ -134,7 +141,7 @@ retry:
12690 printk("DEBUG: manuf[%lx] impl[%lx]\n",
12693 - sparc_cpu_type = "Unknown CPU";
12694 + sparc_pmu_type = "Unknown PMU";
12697 for (i = 0; i < NSPARCFPU; i++) {
12698 diff --git a/arch/sparc64/kernel/hvcalls.S b/arch/sparc64/kernel/hvcalls.S
12699 index a2810f3..b9f508c 100644
12700 --- a/arch/sparc64/kernel/hvcalls.S
12701 +++ b/arch/sparc64/kernel/hvcalls.S
12702 @@ -884,3 +884,44 @@ sun4v_mmu_demap_all:
12705 .size sun4v_mmu_demap_all, .-sun4v_mmu_demap_all
12707 + .globl sun4v_niagara_getperf
12708 + .type sun4v_niagara_getperf,#function
12709 +sun4v_niagara_getperf:
12711 + mov HV_FAST_GET_PERFREG, %o5
12716 + .size sun4v_niagara_getperf, .-sun4v_niagara_getperf
12718 + .globl sun4v_niagara_setperf
12719 + .type sun4v_niagara_setperf,#function
12720 +sun4v_niagara_setperf:
12721 + mov HV_FAST_SET_PERFREG, %o5
12725 + .size sun4v_niagara_setperf, .-sun4v_niagara_setperf
12727 + .globl sun4v_niagara2_getperf
12728 + .type sun4v_niagara2_getperf,#function
12729 +sun4v_niagara2_getperf:
12731 + mov HV_FAST_N2_GET_PERFREG, %o5
12736 + .size sun4v_niagara2_getperf, .-sun4v_niagara2_getperf
12738 + .globl sun4v_niagara2_setperf
12739 + .type sun4v_niagara2_setperf,#function
12740 +sun4v_niagara2_setperf:
12741 + mov HV_FAST_N2_SET_PERFREG, %o5
12745 + .size sun4v_niagara2_setperf, .-sun4v_niagara2_setperf
12747 diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
12748 index 7495bc7..e2bcca5 100644
12749 --- a/arch/sparc64/kernel/irq.c
12750 +++ b/arch/sparc64/kernel/irq.c
12751 @@ -749,6 +749,20 @@ void handler_irq(int irq, struct pt_regs *regs)
12753 set_irq_regs(old_regs);
12755 +static void unhandled_perf_irq(struct pt_regs *regs)
12757 + unsigned long pcr, pic;
12764 + printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
12765 + smp_processor_id());
12766 + printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
12767 + smp_processor_id(), pcr, pic);
12770 void do_softirq(void)
12772 @@ -776,6 +790,55 @@ void do_softirq(void)
12773 local_irq_restore(flags);
12776 +/* Almost a direct copy of the powerpc PMC code. */
12777 +static DEFINE_SPINLOCK(perf_irq_lock);
12778 +static void *perf_irq_owner_caller; /* mostly for debugging */
12779 +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
12781 +/* Invoked from level 15 PIL handler in trap table. */
12782 +void perfctr_irq(int irq, struct pt_regs *regs)
12784 + clear_softint(1 << irq);
12788 +int register_perfctr_intr(void (*handler)(struct pt_regs *))
12795 + spin_lock(&perf_irq_lock);
12796 + if (perf_irq != unhandled_perf_irq) {
12797 + printk(KERN_WARNING "register_perfctr_intr: "
12798 + "perf IRQ busy (reserved by caller %p)\n",
12799 + perf_irq_owner_caller);
12804 + perf_irq_owner_caller = __builtin_return_address(0);
12805 + perf_irq = handler;
12809 + spin_unlock(&perf_irq_lock);
12813 +EXPORT_SYMBOL_GPL(register_perfctr_intr);
12815 +void release_perfctr_intr(void (*handler)(struct pt_regs *))
12817 + spin_lock(&perf_irq_lock);
12818 + perf_irq_owner_caller = NULL;
12819 + perf_irq = unhandled_perf_irq;
12820 + spin_unlock(&perf_irq_lock);
12822 +EXPORT_SYMBOL_GPL(release_perfctr_intr);
12825 #ifdef CONFIG_HOTPLUG_CPU
12826 void fixup_irqs(void)
12828 diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
12829 index 15f4178..7282d21 100644
12830 --- a/arch/sparc64/kernel/process.c
12831 +++ b/arch/sparc64/kernel/process.c
12833 #include <linux/cpu.h>
12834 #include <linux/elfcore.h>
12835 #include <linux/sysrq.h>
12836 +#include <linux/perfmon_kern.h>
12838 #include <asm/oplib.h>
12839 #include <asm/uaccess.h>
12840 @@ -385,11 +386,7 @@ void exit_thread(void)
12844 - if (test_and_clear_thread_flag(TIF_PERFCTR)) {
12845 - t->user_cntd0 = t->user_cntd1 = NULL;
12849 + pfm_exit_thread();
12852 void flush_thread(void)
12853 @@ -411,13 +408,6 @@ void flush_thread(void)
12855 set_thread_wsaved(0);
12857 - /* Turn off performance counters if on. */
12858 - if (test_and_clear_thread_flag(TIF_PERFCTR)) {
12859 - t->user_cntd0 = t->user_cntd1 = NULL;
12864 /* Clear FPU register state. */
12867 @@ -631,16 +621,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
12868 t->kregs->u_regs[UREG_FP] =
12869 ((unsigned long) child_sf) - STACK_BIAS;
12871 - /* Special case, if we are spawning a kernel thread from
12872 - * a userspace task (usermode helper, NFS or similar), we
12873 - * must disable performance counters in the child because
12874 - * the address space and protection realm are changing.
12876 - if (t->flags & _TIF_PERFCTR) {
12877 - t->user_cntd0 = t->user_cntd1 = NULL;
12879 - t->flags &= ~_TIF_PERFCTR;
12881 t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
12882 t->kregs->u_regs[UREG_G6] = (unsigned long) t;
12883 t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
12884 @@ -673,6 +653,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
12885 if (clone_flags & CLONE_SETTLS)
12886 t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
12888 + pfm_copy_thread(p);
12893 diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
12894 index 97a993c..c2af29d 100644
12895 --- a/arch/sparc64/kernel/rtrap.S
12896 +++ b/arch/sparc64/kernel/rtrap.S
12897 @@ -65,55 +65,14 @@ __handle_user_windows:
12898 ba,pt %xcc, __handle_user_windows_continue
12901 -__handle_perfctrs:
12902 - call update_perfctrs
12903 - wrpr %g0, RTRAP_PSTATE, %pstate
12904 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12905 - ldub [%g6 + TI_WSAVED], %o2
12908 - /* Redo userwin+sched+sig checks */
12909 - call fault_in_user_windows
12911 - wrpr %g0, RTRAP_PSTATE, %pstate
12912 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12913 - ldx [%g6 + TI_FLAGS], %l0
12914 - andcc %l0, _TIF_NEED_RESCHED, %g0
12919 - wrpr %g0, RTRAP_PSTATE, %pstate
12920 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12921 - ldx [%g6 + TI_FLAGS], %l0
12922 -1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
12924 - be,pt %xcc, __handle_perfctrs_continue
12925 - sethi %hi(TSTATE_PEF), %o0
12927 - add %sp, PTREGS_OFF, %o0
12929 - call do_notify_resume
12931 - wrpr %g0, RTRAP_PSTATE, %pstate
12932 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12933 - /* Signal delivery can modify pt_regs tstate, so we must
12936 - ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
12937 - sethi %hi(0xf << 20), %l4
12938 - and %l1, %l4, %l4
12939 - andn %l1, %l4, %l1
12940 - ba,pt %xcc, __handle_perfctrs_continue
12942 - sethi %hi(TSTATE_PEF), %o0
12945 andcc %l5, FPRS_FEF, %g0
12946 sethi %hi(TSTATE_PEF), %o0
12947 be,a,pn %icc, __handle_userfpu_continue
12949 - ba,a,pt %xcc, __handle_userfpu_continue
12950 + ba,pt %xcc, __handle_userfpu_continue
12955 @@ -202,12 +161,8 @@ __handle_signal_continue:
12956 brnz,pn %o2, __handle_user_windows
12958 __handle_user_windows_continue:
12959 - ldx [%g6 + TI_FLAGS], %l5
12960 - andcc %l5, _TIF_PERFCTR, %g0
12961 sethi %hi(TSTATE_PEF), %o0
12962 - bne,pn %xcc, __handle_perfctrs
12963 -__handle_perfctrs_continue:
12964 - andcc %l1, %o0, %g0
12965 + andcc %l1, %o0, %g0
12967 /* This fpdepth clear is necessary for non-syscall rtraps only */
12969 diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
12970 index c8b03a4..248aa1f 100644
12971 --- a/arch/sparc64/kernel/setup.c
12972 +++ b/arch/sparc64/kernel/setup.c
12973 @@ -352,6 +352,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
12980 "ncpus probed\t: %d\n"
12981 @@ -364,6 +365,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
12987 ((tlb_type == hypervisor) ?
12989 diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
12990 index ec82d76..cea1082 100644
12991 --- a/arch/sparc64/kernel/signal.c
12992 +++ b/arch/sparc64/kernel/signal.c
12994 #include <linux/tty.h>
12995 #include <linux/binfmts.h>
12996 #include <linux/bitops.h>
12997 +#include <linux/perfmon_kern.h>
12999 #include <asm/uaccess.h>
13000 #include <asm/ptrace.h>
13001 @@ -608,6 +609,9 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
13003 void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
13005 + if (thread_info_flags & _TIF_PERFMON_WORK)
13006 + pfm_handle_work(regs);
13008 if (thread_info_flags & _TIF_SIGPENDING)
13009 do_signal(regs, orig_i0);
13010 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
13011 diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
13012 index 39749e3..384004b 100644
13013 --- a/arch/sparc64/kernel/sys_sparc.c
13014 +++ b/arch/sparc64/kernel/sys_sparc.c
13017 #include <asm/uaccess.h>
13018 #include <asm/utrap.h>
13019 -#include <asm/perfctr.h>
13020 #include <asm/unistd.h>
13023 @@ -791,106 +790,10 @@ asmlinkage long sys_rt_sigaction(int sig,
13027 -/* Invoked by rtrap code to update performance counters in
13030 -asmlinkage void update_perfctrs(void)
13032 - unsigned long pic, tmp;
13035 - tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
13036 - __put_user(tmp, current_thread_info()->user_cntd0);
13037 - tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
13038 - __put_user(tmp, current_thread_info()->user_cntd1);
13042 asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1, unsigned long arg2)
13048 - current_thread_info()->pcr_reg = arg2;
13049 - current_thread_info()->user_cntd0 = (u64 __user *) arg0;
13050 - current_thread_info()->user_cntd1 = (u64 __user *) arg1;
13051 - current_thread_info()->kernel_cntd0 =
13052 - current_thread_info()->kernel_cntd1 = 0;
13055 - set_thread_flag(TIF_PERFCTR);
13058 - case PERFCTR_OFF:
13060 - if (test_thread_flag(TIF_PERFCTR)) {
13061 - current_thread_info()->user_cntd0 =
13062 - current_thread_info()->user_cntd1 = NULL;
13063 - current_thread_info()->pcr_reg = 0;
13065 - clear_thread_flag(TIF_PERFCTR);
13070 - case PERFCTR_READ: {
13071 - unsigned long pic, tmp;
13073 - if (!test_thread_flag(TIF_PERFCTR)) {
13078 - tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
13079 - err |= __put_user(tmp, current_thread_info()->user_cntd0);
13080 - tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
13081 - err |= __put_user(tmp, current_thread_info()->user_cntd1);
13086 - case PERFCTR_CLRPIC:
13087 - if (!test_thread_flag(TIF_PERFCTR)) {
13091 - current_thread_info()->kernel_cntd0 =
13092 - current_thread_info()->kernel_cntd1 = 0;
13096 - case PERFCTR_SETPCR: {
13097 - u64 __user *user_pcr = (u64 __user *)arg0;
13099 - if (!test_thread_flag(TIF_PERFCTR)) {
13103 - err |= __get_user(current_thread_info()->pcr_reg, user_pcr);
13104 - write_pcr(current_thread_info()->pcr_reg);
13105 - current_thread_info()->kernel_cntd0 =
13106 - current_thread_info()->kernel_cntd1 = 0;
13111 - case PERFCTR_GETPCR: {
13112 - u64 __user *user_pcr = (u64 __user *)arg0;
13114 - if (!test_thread_flag(TIF_PERFCTR)) {
13118 - err |= __put_user(current_thread_info()->pcr_reg, user_pcr);
13127 + /* Superceded by perfmon2 */
13132 diff --git a/arch/sparc64/kernel/syscalls.S b/arch/sparc64/kernel/syscalls.S
13133 index a2f2427..b20bf1e 100644
13134 --- a/arch/sparc64/kernel/syscalls.S
13135 +++ b/arch/sparc64/kernel/syscalls.S
13136 @@ -117,26 +117,9 @@ ret_from_syscall:
13137 stb %g0, [%g6 + TI_NEW_CHILD]
13138 ldx [%g6 + TI_FLAGS], %l0
13141 - andcc %l0, _TIF_PERFCTR, %g0
13144 - ldx [%g6 + TI_PCR], %o7
13145 - wr %g0, %o7, %pcr
13147 - /* Blackbird errata workaround. See commentary in
13148 - * smp.c:smp_percpu_timer_interrupt() for more
13155 -99: wr %g0, %g0, %pic
13158 -1: ba,pt %xcc, ret_sys_call
13159 - ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
13161 + ba,pt %xcc, ret_sys_call
13162 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
13165 .type sparc_exit,#function
13166 diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
13167 index 0fdbf3b..1a1a296 100644
13168 --- a/arch/sparc64/kernel/systbls.S
13169 +++ b/arch/sparc64/kernel/systbls.S
13170 @@ -82,7 +82,9 @@ sys_call_table32:
13171 .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
13172 /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
13173 .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
13174 -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
13175 +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs
13176 + .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
13177 +/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
13179 #endif /* CONFIG_COMPAT */
13181 @@ -156,4 +158,6 @@ sys_call_table:
13182 .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
13183 /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
13184 .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
13185 -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
13186 +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs
13187 + .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
13188 +/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
13189 diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
13190 index c824df1..be45d09 100644
13191 --- a/arch/sparc64/kernel/traps.c
13192 +++ b/arch/sparc64/kernel/traps.c
13193 @@ -2470,86 +2470,90 @@ extern void tsb_config_offsets_are_bolixed_dave(void);
13194 /* Only invoked on boot processor. */
13195 void __init trap_init(void)
13197 - /* Compile time sanity check. */
13198 - if (TI_TASK != offsetof(struct thread_info, task) ||
13199 - TI_FLAGS != offsetof(struct thread_info, flags) ||
13200 - TI_CPU != offsetof(struct thread_info, cpu) ||
13201 - TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
13202 - TI_KSP != offsetof(struct thread_info, ksp) ||
13203 - TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) ||
13204 - TI_KREGS != offsetof(struct thread_info, kregs) ||
13205 - TI_UTRAPS != offsetof(struct thread_info, utraps) ||
13206 - TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) ||
13207 - TI_REG_WINDOW != offsetof(struct thread_info, reg_window) ||
13208 - TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) ||
13209 - TI_GSR != offsetof(struct thread_info, gsr) ||
13210 - TI_XFSR != offsetof(struct thread_info, xfsr) ||
13211 - TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) ||
13212 - TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) ||
13213 - TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
13214 - TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
13215 - TI_PCR != offsetof(struct thread_info, pcr_reg) ||
13216 - TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
13217 - TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
13218 - TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
13219 - TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
13220 - TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
13221 - TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
13222 - TI_FPREGS != offsetof(struct thread_info, fpregs) ||
13223 - (TI_FPREGS & (64 - 1)))
13224 - thread_info_offsets_are_bolixed_dave();
13226 - if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
13227 - (TRAP_PER_CPU_PGD_PADDR !=
13228 - offsetof(struct trap_per_cpu, pgd_paddr)) ||
13229 - (TRAP_PER_CPU_CPU_MONDO_PA !=
13230 - offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
13231 - (TRAP_PER_CPU_DEV_MONDO_PA !=
13232 - offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
13233 - (TRAP_PER_CPU_RESUM_MONDO_PA !=
13234 - offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
13235 - (TRAP_PER_CPU_RESUM_KBUF_PA !=
13236 - offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
13237 - (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
13238 - offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
13239 - (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
13240 - offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
13241 - (TRAP_PER_CPU_FAULT_INFO !=
13242 - offsetof(struct trap_per_cpu, fault_info)) ||
13243 - (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
13244 - offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
13245 - (TRAP_PER_CPU_CPU_LIST_PA !=
13246 - offsetof(struct trap_per_cpu, cpu_list_pa)) ||
13247 - (TRAP_PER_CPU_TSB_HUGE !=
13248 - offsetof(struct trap_per_cpu, tsb_huge)) ||
13249 - (TRAP_PER_CPU_TSB_HUGE_TEMP !=
13250 - offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
13251 - (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
13252 - offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
13253 - (TRAP_PER_CPU_CPU_MONDO_QMASK !=
13254 - offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
13255 - (TRAP_PER_CPU_DEV_MONDO_QMASK !=
13256 - offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
13257 - (TRAP_PER_CPU_RESUM_QMASK !=
13258 - offsetof(struct trap_per_cpu, resum_qmask)) ||
13259 - (TRAP_PER_CPU_NONRESUM_QMASK !=
13260 - offsetof(struct trap_per_cpu, nonresum_qmask)))
13261 - trap_per_cpu_offsets_are_bolixed_dave();
13263 - if ((TSB_CONFIG_TSB !=
13264 - offsetof(struct tsb_config, tsb)) ||
13265 - (TSB_CONFIG_RSS_LIMIT !=
13266 - offsetof(struct tsb_config, tsb_rss_limit)) ||
13267 - (TSB_CONFIG_NENTRIES !=
13268 - offsetof(struct tsb_config, tsb_nentries)) ||
13269 - (TSB_CONFIG_REG_VAL !=
13270 - offsetof(struct tsb_config, tsb_reg_val)) ||
13271 - (TSB_CONFIG_MAP_VADDR !=
13272 - offsetof(struct tsb_config, tsb_map_vaddr)) ||
13273 - (TSB_CONFIG_MAP_PTE !=
13274 - offsetof(struct tsb_config, tsb_map_pte)))
13275 - tsb_config_offsets_are_bolixed_dave();
13277 + BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task));
13278 + BUILD_BUG_ON(TI_FLAGS != offsetof(struct thread_info, flags));
13279 + BUILD_BUG_ON(TI_CPU != offsetof(struct thread_info, cpu));
13280 + BUILD_BUG_ON(TI_FPSAVED != offsetof(struct thread_info, fpsaved));
13281 + BUILD_BUG_ON(TI_KSP != offsetof(struct thread_info, ksp));
13282 + BUILD_BUG_ON(TI_FAULT_ADDR !=
13283 + offsetof(struct thread_info, fault_address));
13284 + BUILD_BUG_ON(TI_KREGS != offsetof(struct thread_info, kregs));
13285 + BUILD_BUG_ON(TI_UTRAPS != offsetof(struct thread_info, utraps));
13286 + BUILD_BUG_ON(TI_EXEC_DOMAIN !=
13287 + offsetof(struct thread_info, exec_domain));
13288 + BUILD_BUG_ON(TI_REG_WINDOW !=
13289 + offsetof(struct thread_info, reg_window));
13290 + BUILD_BUG_ON(TI_RWIN_SPTRS !=
13291 + offsetof(struct thread_info, rwbuf_stkptrs));
13292 + BUILD_BUG_ON(TI_GSR != offsetof(struct thread_info, gsr));
13293 + BUILD_BUG_ON(TI_XFSR != offsetof(struct thread_info, xfsr));
13294 + BUILD_BUG_ON(TI_PRE_COUNT !=
13295 + offsetof(struct thread_info, preempt_count));
13296 + BUILD_BUG_ON(TI_NEW_CHILD !=
13297 + offsetof(struct thread_info, new_child));
13298 + BUILD_BUG_ON(TI_SYS_NOERROR !=
13299 + offsetof(struct thread_info, syscall_noerror));
13300 + BUILD_BUG_ON(TI_RESTART_BLOCK !=
13301 + offsetof(struct thread_info, restart_block));
13302 + BUILD_BUG_ON(TI_KUNA_REGS !=
13303 + offsetof(struct thread_info, kern_una_regs));
13304 + BUILD_BUG_ON(TI_KUNA_INSN !=
13305 + offsetof(struct thread_info, kern_una_insn));
13306 + BUILD_BUG_ON(TI_FPREGS != offsetof(struct thread_info, fpregs));
13307 + BUILD_BUG_ON((TI_FPREGS & (64 - 1)));
13309 + BUILD_BUG_ON(TRAP_PER_CPU_THREAD !=
13310 + offsetof(struct trap_per_cpu, thread));
13311 + BUILD_BUG_ON(TRAP_PER_CPU_PGD_PADDR !=
13312 + offsetof(struct trap_per_cpu, pgd_paddr));
13313 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_PA !=
13314 + offsetof(struct trap_per_cpu, cpu_mondo_pa));
13315 + BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_PA !=
13316 + offsetof(struct trap_per_cpu, dev_mondo_pa));
13317 + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_MONDO_PA !=
13318 + offsetof(struct trap_per_cpu, resum_mondo_pa));
13319 + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_KBUF_PA !=
13320 + offsetof(struct trap_per_cpu, resum_kernel_buf_pa));
13321 + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_MONDO_PA !=
13322 + offsetof(struct trap_per_cpu, nonresum_mondo_pa));
13323 + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_KBUF_PA !=
13324 + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa));
13325 + BUILD_BUG_ON(TRAP_PER_CPU_FAULT_INFO !=
13326 + offsetof(struct trap_per_cpu, fault_info));
13327 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
13328 + offsetof(struct trap_per_cpu, cpu_mondo_block_pa));
13329 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_LIST_PA !=
13330 + offsetof(struct trap_per_cpu, cpu_list_pa));
13331 + BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE !=
13332 + offsetof(struct trap_per_cpu, tsb_huge));
13333 + BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE_TEMP !=
13334 + offsetof(struct trap_per_cpu, tsb_huge_temp));
13336 + BUILD_BUG_ON(TRAP_PER_CPU_IRQ_WORKLIST !=
13337 + offsetof(struct trap_per_cpu, irq_worklist));
13339 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_QMASK !=
13340 + offsetof(struct trap_per_cpu, cpu_mondo_qmask));
13341 + BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_QMASK !=
13342 + offsetof(struct trap_per_cpu, dev_mondo_qmask));
13343 + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_QMASK !=
13344 + offsetof(struct trap_per_cpu, resum_qmask));
13345 + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_QMASK !=
13346 + offsetof(struct trap_per_cpu, nonresum_qmask));
13348 + BUILD_BUG_ON(TSB_CONFIG_TSB !=
13349 + offsetof(struct tsb_config, tsb));
13350 + BUILD_BUG_ON(TSB_CONFIG_RSS_LIMIT !=
13351 + offsetof(struct tsb_config, tsb_rss_limit));
13352 + BUILD_BUG_ON(TSB_CONFIG_NENTRIES !=
13353 + offsetof(struct tsb_config, tsb_nentries));
13354 + BUILD_BUG_ON(TSB_CONFIG_REG_VAL !=
13355 + offsetof(struct tsb_config, tsb_reg_val));
13356 + BUILD_BUG_ON(TSB_CONFIG_MAP_VADDR !=
13357 + offsetof(struct tsb_config, tsb_map_vaddr));
13358 + BUILD_BUG_ON(TSB_CONFIG_MAP_PTE !=
13359 + offsetof(struct tsb_config, tsb_map_pte));
13361 /* Attach to the address space of init_task. On SMP we
13362 * do this in smp.c:smp_callin for other cpus.
13364 diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
13365 index 1ade3d6..2a31ffa 100644
13366 --- a/arch/sparc64/kernel/ttable.S
13367 +++ b/arch/sparc64/kernel/ttable.S
13368 @@ -66,7 +66,7 @@ tl0_irq6: BTRAP(0x46)
13369 tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49)
13370 tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d)
13371 tl0_irq14: TRAP_IRQ(timer_interrupt, 14)
13372 -tl0_irq15: TRAP_IRQ(handler_irq, 15)
13373 +tl0_irq15: TRAP_IRQ(perfctr_irq, 15)
13374 tl0_resv050: BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55)
13375 tl0_resv056: BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b)
13376 tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f)
13377 diff --git a/arch/sparc64/perfmon/Kconfig b/arch/sparc64/perfmon/Kconfig
13378 new file mode 100644
13379 index 0000000..4672024
13381 +++ b/arch/sparc64/perfmon/Kconfig
13383 +menu "Hardware Performance Monitoring support"
13385 + bool "Perfmon2 performance monitoring interface"
13388 + Enables the perfmon2 interface to access the hardware
13389 + performance counters. See <http://perfmon2.sf.net/> for
13392 +config PERFMON_DEBUG
13393 + bool "Perfmon debugging"
13394 + depends on PERFMON
13397 + Enables perfmon debugging support
13399 +config PERFMON_DEBUG_FS
13400 + bool "Enable perfmon statistics reporting via debugfs"
13402 + depends on PERFMON && DEBUG_FS
13404 + Enable collection and reporting of perfmon timing statistics under
13405 + debugfs. This is used for debugging and performance analysis of the
13406 + subsystem. The debugfs filesystem must be mounted.
13409 diff --git a/arch/sparc64/perfmon/Makefile b/arch/sparc64/perfmon/Makefile
13410 new file mode 100644
13411 index 0000000..ad2d907
13413 +++ b/arch/sparc64/perfmon/Makefile
13415 +obj-$(CONFIG_PERFMON) += perfmon.o
13416 diff --git a/arch/sparc64/perfmon/perfmon.c b/arch/sparc64/perfmon/perfmon.c
13417 new file mode 100644
13418 index 0000000..9e29833
13420 +++ b/arch/sparc64/perfmon/perfmon.c
13422 +/* perfmon.c: sparc64 perfmon support
13424 + * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
13427 +#include <linux/kernel.h>
13428 +#include <linux/module.h>
13429 +#include <linux/irq.h>
13430 +#include <linux/perfmon_kern.h>
13432 +#include <asm/system.h>
13433 +#include <asm/spitfire.h>
13434 +#include <asm/hypervisor.h>
13437 + void (*write)(u64);
13438 + u64 (*read)(void);
13441 +static void direct_write_pcr(u64 val)
13446 +static u64 direct_read_pcr(void)
13455 +static struct pcr_ops direct_pcr_ops = {
13456 + .write = direct_write_pcr,
13457 + .read = direct_read_pcr,
13460 +/* Using the hypervisor call is needed so that we can set the
13461 + * hypervisor trace bit correctly, which is hyperprivileged.
13463 +static void n2_write_pcr(u64 val)
13465 + unsigned long ret;
13467 + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
13468 + if (val != HV_EOK)
13472 +static u64 n2_read_pcr(void)
13481 +static struct pcr_ops n2_pcr_ops = {
13482 + .write = n2_write_pcr,
13483 + .read = n2_read_pcr,
13486 +static struct pcr_ops *pcr_ops;
13488 +void pfm_arch_write_pmc(struct pfm_context *ctx,
13489 + unsigned int cnum, u64 value)
13492 + * we only write to the actual register when monitoring is
13493 + * active (pfm_start was issued)
13495 + if (ctx && ctx->flags.started == 0)
13498 + pcr_ops->write(value);
13501 +u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
13503 + return pcr_ops->read();
13507 + * collect pending overflowed PMDs. Called from pfm_ctxsw()
13508 + * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
13509 + * and set->npend_ovfls. Interrupts are masked
13511 +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
13513 + unsigned int max = ctx->regs.max_intr_pmd;
13514 + u64 wmask = 1ULL << pfm_pmu_conf->counter_width;
13515 + u64 *intr_pmds = ctx->regs.intr_pmds;
13516 + u64 *used_mask = set->used_pmds;
13517 + u64 mask[PFM_PMD_BV];
13520 + bitmap_and(cast_ulp(mask),
13521 + cast_ulp(intr_pmds),
13522 + cast_ulp(used_mask),
13526 + * check all PMD that can generate interrupts
13527 + * (that includes counters)
13529 + for (i = 0; i < max; i++) {
13530 + if (test_bit(i, mask)) {
13531 + u64 new_val = pfm_arch_read_pmd(ctx, i);
13533 + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
13534 + i, (unsigned long long)new_val,
13535 + (new_val&wmask) ? 1 : 0);
13537 + if (new_val & wmask) {
13538 + __set_bit(i, set->povfl_pmds);
13539 + set->npend_ovfls++;
13545 +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
13546 + struct pfm_event_set *set)
13548 + unsigned int i, max = ctx->regs.max_pmc;
13551 + * clear enable bits, assume all pmcs are enable pmcs
13553 + for (i = 0; i < max; i++) {
13554 + if (test_bit(i, set->used_pmcs))
13555 + pfm_arch_write_pmc(ctx, i, 0);
13558 + if (set->npend_ovfls)
13561 + __pfm_get_ovfl_pmds(ctx, set);
13565 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
13566 + * Context is locked. Interrupts are masked. Monitoring is active.
13567 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
13569 + * for per-thread:
13570 + * must stop monitoring for the task
13573 + * non-zero : did not save PMDs (as part of stopping the PMU)
13574 + * 0 : saved PMDs (no need to save them in caller)
13576 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
13579 + * disable lazy restore of PMC registers.
13581 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
13583 + pfm_stop_active(task, ctx, ctx->active_set);
13589 + * Called from pfm_stop() and idle notifier
13591 + * Interrupts are masked. Context is locked. Set is the active set.
13593 + * For per-thread:
13594 + * task is not necessarily current. If not current task, then
13595 + * task is guaranteed stopped and off any cpu. Access to PMU
13596 + * is not guaranteed. Interrupts are masked. Context is locked.
13597 + * Set is the active set.
13599 + * For system-wide:
13600 + * task is current
13602 + * must disable active monitoring. ctx cannot be NULL
13604 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
13607 + * no need to go through stop_save()
13608 + * if we are already stopped
13610 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
13614 + * stop live registers and collect pending overflow
13616 + if (task == current)
13617 + pfm_stop_active(task, ctx, ctx->active_set);
13621 + * Enable active monitoring. Called from pfm_start() and
13622 + * pfm_arch_unmask_monitoring().
13624 + * Interrupts are masked. Context is locked. Set is the active set.
13626 + * For per-trhead:
13627 + * Task is not necessarily current. If not current task, then task
13628 + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
13630 + * For system-wide:
13631 + * task is always current
13633 + * must enable active monitoring.
13635 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
13637 + struct pfm_event_set *set;
13638 + unsigned int max_pmc = ctx->regs.max_pmc;
13641 + if (task != current)
13644 + set = ctx->active_set;
13645 + for (i = 0; i < max_pmc; i++) {
13646 + if (test_bit(i, set->used_pmcs))
13647 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
13652 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
13653 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
13654 + * context is locked. Interrupts are masked. set cannot be NULL.
13655 + * Access to the PMU is guaranteed.
13657 + * function must restore all PMD registers from set.
13659 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
13661 + unsigned int max_pmd = ctx->regs.max_pmd;
13662 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
13663 + u64 *impl_pmds = ctx->regs.pmds;
13667 + * must restore all pmds to avoid leaking
13668 + * information to user.
13670 + for (i = 0; i < max_pmd; i++) {
13673 + if (test_bit(i, impl_pmds) == 0)
13676 + val = set->pmds[i].value;
13679 + * set upper bits for counter to ensure
13680 + * overflow will trigger
13682 + val &= ovfl_mask;
13684 + pfm_arch_write_pmd(ctx, i, val);
13689 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
13690 + * pfm_context_load_sys(), pfm_ctxsw().
13691 + * Context is locked. Interrupts are masked. set cannot be NULL.
13692 + * Access to the PMU is guaranteed.
13694 + * function must restore all PMC registers from set, if needed.
13696 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
13698 + unsigned int max_pmc = ctx->regs.max_pmc;
13699 + u64 *impl_pmcs = ctx->regs.pmcs;
13702 + /* If we're masked or stopped we don't need to bother restoring
13705 + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
13709 + * restore all pmcs
13711 + for (i = 0; i < max_pmc; i++)
13712 + if (test_bit(i, impl_pmcs))
13713 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
13716 +char *pfm_arch_get_pmu_module_name(void)
13721 +void perfmon_interrupt(struct pt_regs *regs)
13723 + pfm_interrupt_handler(instruction_pointer(regs), regs);
13726 +static struct pfm_regmap_desc pfm_sparc64_pmc_desc[] = {
13727 + PMC_D(PFM_REG_I, "PCR", 0, 0, 0, 0),
13730 +static struct pfm_regmap_desc pfm_sparc64_pmd_desc[] = {
13731 + PMD_D(PFM_REG_C, "PIC0", 0),
13732 + PMD_D(PFM_REG_C, "PIC1", 0),
13735 +static int pfm_sparc64_probe(void)
13740 +static struct pfm_pmu_config pmu_sparc64_pmu_conf = {
13741 + .counter_width = 31,
13742 + .pmd_desc = pfm_sparc64_pmd_desc,
13743 + .num_pmd_entries = 2,
13744 + .pmc_desc = pfm_sparc64_pmc_desc,
13745 + .num_pmc_entries = 1,
13746 + .probe_pmu = pfm_sparc64_probe,
13747 + .flags = PFM_PMU_BUILTIN_FLAG,
13748 + .owner = THIS_MODULE,
13751 +static unsigned long perf_hsvc_group;
13752 +static unsigned long perf_hsvc_major;
13753 +static unsigned long perf_hsvc_minor;
13755 +static int __init register_perf_hsvc(void)
13757 + if (tlb_type == hypervisor) {
13758 + switch (sun4v_chip_type) {
13759 + case SUN4V_CHIP_NIAGARA1:
13760 + perf_hsvc_group = HV_GRP_N2_CPU;
13763 + case SUN4V_CHIP_NIAGARA2:
13764 + perf_hsvc_group = HV_GRP_N2_CPU;
13772 + perf_hsvc_major = 1;
13773 + perf_hsvc_minor = 0;
13774 + if (sun4v_hvapi_register(perf_hsvc_group,
13776 + &perf_hsvc_minor)) {
13777 + printk("perfmon: Could not register N2 hvapi.\n");
13784 +static void unregister_perf_hsvc(void)
13786 + if (tlb_type != hypervisor)
13788 + sun4v_hvapi_unregister(perf_hsvc_group);
13791 +static int __init pfm_sparc64_pmu_init(void)
13796 + err = register_perf_hsvc();
13800 + if (tlb_type == hypervisor &&
13801 + sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
13802 + pcr_ops = &n2_pcr_ops;
13804 + pcr_ops = &direct_pcr_ops;
13806 + if (!strcmp(sparc_pmu_type, "ultra12"))
13807 + mask = (0xf << 11) | (0xf << 4) | 0x7;
13808 + else if (!strcmp(sparc_pmu_type, "ultra3") ||
13809 + !strcmp(sparc_pmu_type, "ultra3i") ||
13810 + !strcmp(sparc_pmu_type, "ultra3+") ||
13811 + !strcmp(sparc_pmu_type, "ultra4+"))
13812 + mask = (0x3f << 11) | (0x3f << 4) | 0x7;
13813 + else if (!strcmp(sparc_pmu_type, "niagara2"))
13814 + mask = ((1UL << 63) | (1UL << 62) |
13815 + (1UL << 31) | (0xfUL << 27) | (0xffUL << 19) |
13816 + (1UL << 18) | (0xfUL << 14) | (0xff << 6) |
13817 + (0x3UL << 4) | 0x7UL);
13818 + else if (!strcmp(sparc_pmu_type, "niagara"))
13819 + mask = ((1UL << 9) | (1UL << 8) |
13820 + (0x7UL << 4) | 0x7UL);
13826 + pmu_sparc64_pmu_conf.pmu_name = sparc_pmu_type;
13827 + pfm_sparc64_pmc_desc[0].rsvd_msk = ~mask;
13829 + return pfm_pmu_register(&pmu_sparc64_pmu_conf);
13832 + unregister_perf_hsvc();
13836 +static void __exit pfm_sparc64_pmu_exit(void)
13838 + unregister_perf_hsvc();
13839 + return pfm_pmu_unregister(&pmu_sparc64_pmu_conf);
13842 +module_init(pfm_sparc64_pmu_init);
13843 +module_exit(pfm_sparc64_pmu_exit);
13844 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
13845 index ed92864..3a2b544 100644
13846 --- a/arch/x86/Kconfig
13847 +++ b/arch/x86/Kconfig
13848 @@ -1378,6 +1378,8 @@ config COMPAT_VDSO
13852 +source "arch/x86/perfmon/Kconfig"
13856 config ARCH_ENABLE_MEMORY_HOTPLUG
13857 diff --git a/arch/x86/Makefile b/arch/x86/Makefile
13858 index f5631da..c868ad6 100644
13859 --- a/arch/x86/Makefile
13860 +++ b/arch/x86/Makefile
13861 @@ -150,6 +150,8 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
13862 core-y += arch/x86/kernel/
13863 core-y += arch/x86/mm/
13865 +core-$(CONFIG_PERFMON) += arch/x86/perfmon/
13867 # Remaining sub architecture files
13868 core-y += $(mcore-y)
13870 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
13871 index ffc1bb4..58e00cb 100644
13872 --- a/arch/x86/ia32/ia32entry.S
13873 +++ b/arch/x86/ia32/ia32entry.S
13874 @@ -832,4 +832,16 @@ ia32_sys_call_table:
13875 .quad sys_dup3 /* 330 */
13877 .quad sys_inotify_init1
13878 + .quad sys_pfm_create_context
13879 + .quad sys_pfm_write_pmcs
13880 + .quad sys_pfm_write_pmds /* 335 */
13881 + .quad sys_pfm_read_pmds
13882 + .quad sys_pfm_load_context
13883 + .quad sys_pfm_start
13884 + .quad sys_pfm_stop
13885 + .quad sys_pfm_restart /* 340 */
13886 + .quad sys_pfm_create_evtsets
13887 + .quad sys_pfm_getinfo_evtsets
13888 + .quad sys_pfm_delete_evtsets
13889 + .quad sys_pfm_unload_context
13891 diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
13892 index f88bd0d..53fe335 100644
13893 --- a/arch/x86/kernel/apic_32.c
13894 +++ b/arch/x86/kernel/apic_32.c
13896 #include <linux/acpi_pmtmr.h>
13897 #include <linux/module.h>
13898 #include <linux/dmi.h>
13899 +#include <linux/perfmon_kern.h>
13901 #include <asm/atomic.h>
13902 #include <asm/smp.h>
13903 @@ -669,6 +670,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
13904 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
13905 return APIC_EILVT_LVTOFF_IBS;
13907 +EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
13910 * Local APIC start and shutdown
13911 @@ -1367,6 +1369,9 @@ void __init apic_intr_init(void)
13912 #ifdef CONFIG_X86_MCE_P4THERMAL
13913 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
13915 +#ifdef CONFIG_PERFMON
13916 + set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
13921 diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
13922 index 446c062..574cd3b 100644
13923 --- a/arch/x86/kernel/apic_64.c
13924 +++ b/arch/x86/kernel/apic_64.c
13925 @@ -228,6 +228,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
13926 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
13927 return APIC_EILVT_LVTOFF_IBS;
13929 +EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
13932 * Program the next event, relative to now
13933 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
13934 index 4e456bd..5b6d6ca 100644
13935 --- a/arch/x86/kernel/cpu/common.c
13936 +++ b/arch/x86/kernel/cpu/common.c
13938 #include <linux/module.h>
13939 #include <linux/percpu.h>
13940 #include <linux/bootmem.h>
13941 +#include <linux/perfmon_kern.h>
13942 #include <asm/processor.h>
13943 #include <asm/i387.h>
13944 #include <asm/msr.h>
13945 @@ -726,6 +727,8 @@ void __cpuinit cpu_init(void)
13946 current_thread_info()->status = 0;
13948 mxcsr_feature_mask_init();
13950 + pfm_init_percpu();
13953 #ifdef CONFIG_HOTPLUG_CPU
13954 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
13955 index 109792b..0b6e34c 100644
13956 --- a/arch/x86/kernel/entry_32.S
13957 +++ b/arch/x86/kernel/entry_32.S
13958 @@ -513,7 +513,7 @@ ENDPROC(system_call)
13960 RING0_PTREGS_FRAME # can't unwind into user space anyway
13962 - testb $_TIF_NEED_RESCHED, %cl
13963 + testw $(_TIF_NEED_RESCHED|_TIF_PERFMON_WORK), %cx
13967 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
13968 index 89434d4..34e44f5 100644
13969 --- a/arch/x86/kernel/entry_64.S
13970 +++ b/arch/x86/kernel/entry_64.S
13971 @@ -888,7 +888,13 @@ END(error_interrupt)
13972 ENTRY(spurious_interrupt)
13973 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
13974 END(spurious_interrupt)
13977 +#ifdef CONFIG_PERFMON
13978 +ENTRY(pmu_interrupt)
13979 + apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt
13980 +END(pmu_interrupt)
13984 * Exception entry points.
13986 diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
13987 index 1f26fd9..83f6bc1 100644
13988 --- a/arch/x86/kernel/irqinit_64.c
13989 +++ b/arch/x86/kernel/irqinit_64.c
13991 #include <linux/kernel_stat.h>
13992 #include <linux/sysdev.h>
13993 #include <linux/bitops.h>
13994 +#include <linux/perfmon_kern.h>
13996 #include <asm/acpi.h>
13997 #include <asm/atomic.h>
13998 @@ -217,6 +218,10 @@ void __init native_init_IRQ(void)
13999 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
14000 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
14002 +#ifdef CONFIG_PERFMON
14003 + alloc_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
14007 setup_irq(2, &irq2);
14009 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
14010 index 31f40b2..ed27150 100644
14011 --- a/arch/x86/kernel/process_32.c
14012 +++ b/arch/x86/kernel/process_32.c
14014 #include <linux/personality.h>
14015 #include <linux/tick.h>
14016 #include <linux/percpu.h>
14017 +#include <linux/perfmon_kern.h>
14018 #include <linux/prctl.h>
14020 #include <asm/uaccess.h>
14021 @@ -277,6 +278,7 @@ void exit_thread(void)
14022 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
14025 + pfm_exit_thread();
14028 void flush_thread(void)
14029 @@ -334,6 +336,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
14031 savesegment(gs, p->thread.gs);
14033 + pfm_copy_thread(p);
14036 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
14037 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
14038 @@ -448,6 +452,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
14039 prev = &prev_p->thread;
14040 next = &next_p->thread;
14042 + if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
14043 + pfm_ctxsw_out(prev_p, next_p);
14045 debugctl = prev->debugctlmsr;
14046 if (next->ds_area_msr != prev->ds_area_msr) {
14047 /* we clear debugctl to make sure DS
14048 @@ -460,6 +467,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
14049 if (next->debugctlmsr != debugctl)
14050 update_debugctlmsr(next->debugctlmsr);
14052 + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
14053 + pfm_ctxsw_in(prev_p, next_p);
14055 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
14056 set_debugreg(next->debugreg0, 0);
14057 set_debugreg(next->debugreg1, 1);
14058 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
14059 index e12e0e4..97d49ce 100644
14060 --- a/arch/x86/kernel/process_64.c
14061 +++ b/arch/x86/kernel/process_64.c
14063 #include <linux/kprobes.h>
14064 #include <linux/kdebug.h>
14065 #include <linux/tick.h>
14066 +#include <linux/perfmon_kern.h>
14067 #include <linux/prctl.h>
14069 #include <asm/uaccess.h>
14070 @@ -240,6 +241,7 @@ void exit_thread(void)
14071 t->io_bitmap_max = 0;
14074 + pfm_exit_thread();
14077 void flush_thread(void)
14078 @@ -344,6 +346,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
14079 savesegment(es, p->thread.es);
14080 savesegment(ds, p->thread.ds);
14082 + pfm_copy_thread(p);
14084 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
14085 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
14086 if (!p->thread.io_bitmap_ptr) {
14087 @@ -472,6 +476,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
14088 prev = &prev_p->thread,
14089 next = &next_p->thread;
14091 + if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
14092 + pfm_ctxsw_out(prev_p, next_p);
14094 debugctl = prev->debugctlmsr;
14095 if (next->ds_area_msr != prev->ds_area_msr) {
14096 /* we clear debugctl to make sure DS
14097 @@ -484,6 +491,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
14098 if (next->debugctlmsr != debugctl)
14099 update_debugctlmsr(next->debugctlmsr);
14101 + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
14102 + pfm_ctxsw_in(prev_p, next_p);
14104 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
14105 loaddebug(next, 0);
14106 loaddebug(next, 1);
14107 diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
14108 index 6fb5bcd..53e6665 100644
14109 --- a/arch/x86/kernel/signal_32.c
14110 +++ b/arch/x86/kernel/signal_32.c
14112 #include <linux/sched.h>
14113 #include <linux/wait.h>
14114 #include <linux/elf.h>
14115 +#include <linux/perfmon_kern.h>
14116 #include <linux/smp.h>
14117 #include <linux/mm.h>
14119 @@ -657,6 +658,10 @@ static void do_signal(struct pt_regs *regs)
14121 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
14123 + /* process perfmon asynchronous work (e.g. block thread or reset) */
14124 + if (thread_info_flags & _TIF_PERFMON_WORK)
14125 + pfm_handle_work(regs);
14127 /* deal with pending signal delivery */
14128 if (thread_info_flags & _TIF_SIGPENDING)
14130 diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
14131 index ca316b5..6e9fa74 100644
14132 --- a/arch/x86/kernel/signal_64.c
14133 +++ b/arch/x86/kernel/signal_64.c
14135 #include <linux/stddef.h>
14136 #include <linux/personality.h>
14137 #include <linux/compiler.h>
14138 +#include <linux/perfmon_kern.h>
14139 #include <asm/processor.h>
14140 #include <asm/ucontext.h>
14141 #include <asm/uaccess.h>
14142 @@ -549,12 +550,17 @@ static void do_signal(struct pt_regs *regs)
14143 void do_notify_resume(struct pt_regs *regs, void *unused,
14144 __u32 thread_info_flags)
14147 #ifdef CONFIG_X86_MCE
14148 /* notify userspace of pending MCEs */
14149 if (thread_info_flags & _TIF_MCE_NOTIFY)
14151 #endif /* CONFIG_X86_MCE */
14153 + /* process perfmon asynchronous work (e.g. block thread or reset) */
14154 + if (thread_info_flags & _TIF_PERFMON_WORK)
14155 + pfm_handle_work(regs);
14157 /* deal with pending signal delivery */
14158 if (thread_info_flags & _TIF_SIGPENDING)
14160 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
14161 index 7985c5b..9ddf6db 100644
14162 --- a/arch/x86/kernel/smpboot.c
14163 +++ b/arch/x86/kernel/smpboot.c
14165 #include <linux/init.h>
14166 #include <linux/smp.h>
14167 #include <linux/module.h>
14168 +#include <linux/perfmon_kern.h>
14169 #include <linux/sched.h>
14170 #include <linux/percpu.h>
14171 #include <linux/bootmem.h>
14172 @@ -1382,6 +1383,7 @@ int __cpu_disable(void)
14173 remove_cpu_from_maps(cpu);
14174 unlock_vector_lock();
14175 fixup_irqs(cpu_online_map);
14176 + pfm_cpu_disable();
14180 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
14181 index d44395f..e1384a9 100644
14182 --- a/arch/x86/kernel/syscall_table_32.S
14183 +++ b/arch/x86/kernel/syscall_table_32.S
14184 @@ -332,3 +332,15 @@ ENTRY(sys_call_table)
14185 .long sys_dup3 /* 330 */
14187 .long sys_inotify_init1
14188 + .long sys_pfm_create_context
14189 + .long sys_pfm_write_pmcs
14190 + .long sys_pfm_write_pmds /* 335 */
14191 + .long sys_pfm_read_pmds
14192 + .long sys_pfm_load_context
14193 + .long sys_pfm_start
14194 + .long sys_pfm_stop
14195 + .long sys_pfm_restart /* 340 */
14196 + .long sys_pfm_create_evtsets
14197 + .long sys_pfm_getinfo_evtsets
14198 + .long sys_pfm_delete_evtsets
14199 + .long sys_pfm_unload_context
14200 diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
14201 index 8a5f161..10faef5 100644
14202 --- a/arch/x86/oprofile/nmi_int.c
14203 +++ b/arch/x86/oprofile/nmi_int.c
14205 #include <linux/moduleparam.h>
14206 #include <linux/kdebug.h>
14207 #include <linux/cpu.h>
14208 +#include <linux/perfmon_kern.h>
14209 #include <asm/nmi.h>
14210 #include <asm/msr.h>
14211 #include <asm/apic.h>
14212 @@ -217,12 +218,18 @@ static int nmi_setup(void)
14216 - if (!allocate_msrs())
14217 + if (pfm_session_allcpus_acquire())
14220 + if (!allocate_msrs()) {
14221 + pfm_session_allcpus_release();
14225 err = register_die_notifier(&profile_exceptions_nb);
14228 + pfm_session_allcpus_release();
14232 @@ -304,6 +311,7 @@ static void nmi_shutdown(void)
14233 model->shutdown(msrs);
14235 put_cpu_var(cpu_msrs);
14236 + pfm_session_allcpus_release();
14239 static void nmi_cpu_start(void *dummy)
14240 diff --git a/arch/x86/perfmon/Kconfig b/arch/x86/perfmon/Kconfig
14241 new file mode 100644
14242 index 0000000..08842e6
14244 +++ b/arch/x86/perfmon/Kconfig
14246 +menu "Hardware Performance Monitoring support"
14248 + bool "Perfmon2 performance monitoring interface"
14249 + select X86_LOCAL_APIC
14252 + Enables the perfmon2 interface to access the hardware
14253 + performance counters. See <http://perfmon2.sf.net/> for
14256 +config PERFMON_DEBUG
14257 + bool "Perfmon debugging"
14259 + depends on PERFMON
14261 + Enables perfmon debugging support
14263 +config PERFMON_DEBUG_FS
14264 + bool "Enable perfmon statistics reporting via debugfs"
14266 + depends on PERFMON && DEBUG_FS
14268 + Enable collection and reporting of perfmon timing statistics under
14269 + debugfs. This is used for debugging and performance analysis of the
14270 + subsystem.The debugfs filesystem must be mounted.
14272 +config X86_PERFMON_P6
14273 + tristate "Support for Intel P6/Pentium M processor hardware performance counters"
14274 + depends on PERFMON && X86_32
14277 + Enables support for Intel P6-style hardware performance counters.
14278 + To be used for with Intel Pentium III, PentiumPro, Pentium M processors.
14280 +config X86_PERFMON_P4
14281 + tristate "Support for Intel Pentium 4/Xeon hardware performance counters"
14282 + depends on PERFMON
14285 + Enables support for Intel Pentium 4/Xeon (Netburst) hardware performance
14288 +config X86_PERFMON_PEBS_P4
14289 + tristate "Support for Intel Netburst Precise Event-Based Sampling (PEBS)"
14290 + depends on PERFMON && X86_PERFMON_P4
14293 + Enables support for Precise Event-Based Sampling (PEBS) on the Intel
14294 + Netburst processors such as Pentium 4, Xeon which support it.
14296 +config X86_PERFMON_CORE
14297 + tristate "Support for Intel Core-based performance counters"
14298 + depends on PERFMON
14301 + Enables support for Intel Core-based performance counters. Enable
14302 + this option to support Intel Core 2 processors.
14304 +config X86_PERFMON_PEBS_CORE
14305 + tristate "Support for Intel Core Precise Event-Based Sampling (PEBS)"
14306 + depends on PERFMON && X86_PERFMON_CORE
14309 + Enables support for Precise Event-Based Sampling (PEBS) on the Intel
14312 +config X86_PERFMON_INTEL_ATOM
14313 + tristate "Support for Intel Atom processor"
14314 + depends on PERFMON
14317 + Enables support for Intel Atom processors.
14319 +config X86_PERFMON_INTEL_ARCH
14320 + tristate "Support for Intel architectural perfmon v1/v2"
14321 + depends on PERFMON
14324 + Enables support for Intel architectural performance counters.
14325 + This feature was introduced with Intel Core Solo/Core Duo processors.
14327 +config X86_PERFMON_AMD64
14328 + tristate "Support AMD Athlon64/Opteron64 hardware performance counters"
14329 + depends on PERFMON
14332 + Enables support for Athlon64/Opterton64 hardware performance counters.
14333 + Support for family 6, 15 and 16(10H) processors.
14335 diff --git a/arch/x86/perfmon/Makefile b/arch/x86/perfmon/Makefile
14336 new file mode 100644
14337 index 0000000..1cbed3e
14339 +++ b/arch/x86/perfmon/Makefile
14342 +# Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
14343 +# Contributed by Stephane Eranian <eranian@hpl.hp.com>
14345 +obj-$(CONFIG_PERFMON) += perfmon.o
14346 +obj-$(CONFIG_X86_PERFMON_P6) += perfmon_p6.o
14347 +obj-$(CONFIG_X86_PERFMON_P4) += perfmon_p4.o
14348 +obj-$(CONFIG_X86_PERFMON_CORE) += perfmon_intel_core.o
14349 +obj-$(CONFIG_X86_PERFMON_INTEL_ARCH) += perfmon_intel_arch.o
14350 +obj-$(CONFIG_X86_PERFMON_PEBS_P4) += perfmon_pebs_p4_smpl.o
14351 +obj-$(CONFIG_X86_PERFMON_PEBS_CORE) += perfmon_pebs_core_smpl.o
14352 +obj-$(CONFIG_X86_PERFMON_AMD64) += perfmon_amd64.o
14353 +obj-$(CONFIG_X86_PERFMON_INTEL_ATOM) += perfmon_intel_atom.o
14354 diff --git a/arch/x86/perfmon/perfmon.c b/arch/x86/perfmon/perfmon.c
14355 new file mode 100644
14356 index 0000000..e727fed
14358 +++ b/arch/x86/perfmon/perfmon.c
14361 + * This file implements the X86 specific support for the perfmon2 interface
14363 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
14364 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
14366 + * Copyright (c) 2007 Advanced Micro Devices, Inc.
14367 + * Contributed by Robert Richter <robert.richter@amd.com>
14369 + * This program is free software; you can redistribute it and/or
14370 + * modify it under the terms of version 2 of the GNU General Public
14371 + * License as published by the Free Software Foundation.
14373 + * This program is distributed in the hope that it will be useful,
14374 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14375 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14376 + * General Public License for more details.
14378 + * You should have received a copy of the GNU General Public License
14379 + * along with this program; if not, write to the Free Software
14380 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
14383 +#include <linux/interrupt.h>
14384 +#include <linux/perfmon_kern.h>
14385 +#include <linux/kprobes.h>
14386 +#include <linux/kdebug.h>
14387 +#include <linux/nmi.h>
14389 +#include <asm/apic.h>
14391 +DEFINE_PER_CPU(unsigned long, real_iip);
14392 +DEFINE_PER_CPU(int, pfm_using_nmi);
14393 +DEFINE_PER_CPU(unsigned long, saved_lvtpc);
14396 + * pfm_arch_ctxswin_thread - thread context switch in
14397 + * @task: task switched in
14398 + * @ctx: context for the task
14400 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
14401 + * set cannot be NULL. Context is locked. Interrupts are masked.
14403 + * Caller has already restored all PMD and PMC registers, if
14404 + * necessary (i.e., lazy restore scheme).
14406 + * On x86, the only common code just needs to unsecure RDPMC if necessary
14408 + * On model-specific features, e.g., PEBS, IBS, are taken care of in the
14409 + * corresponding PMU description module
14411 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
14413 + struct pfm_arch_context *ctx_arch;
14415 + ctx_arch = pfm_ctx_arch(ctx);
14418 + * restore saved real iip
14420 + if (ctx->active_set->npend_ovfls)
14421 + __get_cpu_var(real_iip) = ctx_arch->saved_real_iip;
14424 + * enable RDPMC on this CPU
14426 + if (ctx_arch->flags.insecure)
14427 + set_in_cr4(X86_CR4_PCE);
14431 + * pfm_arch_ctxswout_thread - context switch out thread
14432 + * @task: task switched out
14433 + * @ctx : context switched out
14435 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
14436 + * Context is locked. Interrupts are masked. Monitoring may be active.
14437 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
14440 + * non-zero : did not save PMDs (as part of stopping the PMU)
14441 + * 0 : saved PMDs (no need to save them in caller)
14443 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
14445 + struct pfm_arch_context *ctx_arch;
14446 + struct pfm_arch_pmu_info *pmu_info;
14448 + ctx_arch = pfm_ctx_arch(ctx);
14449 + pmu_info = pfm_pmu_info();
14452 + * disable lazy restore of PMCS on ctxswin because
14453 + * we modify some of them.
14455 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
14457 + if (ctx->active_set->npend_ovfls)
14458 + ctx_arch->saved_real_iip = __get_cpu_var(real_iip);
14461 + * disable RDPMC on this CPU
14463 + if (ctx_arch->flags.insecure)
14464 + clear_in_cr4(X86_CR4_PCE);
14466 + if (ctx->state == PFM_CTX_MASKED)
14469 + return pmu_info->stop_save(ctx, ctx->active_set);
14473 + * pfm_arch_stop - deactivate monitoring
14474 + * @task: task to stop
14475 + * @ctx: context to stop
14477 + * Called from pfm_stop()
14478 + * Interrupts are masked. Context is locked. Set is the active set.
14480 + * For per-thread:
14481 + * task is not necessarily current. If not current task, then
14482 + * task is guaranteed stopped and off any cpu. Access to PMU
14483 + * is not guaranteed.
14485 + * For system-wide:
14486 + * task is current
14488 + * must disable active monitoring. ctx cannot be NULL
14490 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
14492 + struct pfm_arch_pmu_info *pmu_info;
14494 + pmu_info = pfm_pmu_info();
14497 + * no need to go through stop_save()
14498 + * if we are already stopped
14500 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
14503 + if (task != current)
14506 + pmu_info->stop_save(ctx, ctx->active_set);
14511 + * pfm_arch_start - activate monitoring
14512 + * @task: task to start
14513 + * @ctx: context to stop
14515 + * Interrupts are masked. Context is locked.
14517 + * For per-thread:
14518 + * Task is not necessarily current. If not current task, then task
14519 + * is guaranteed stopped and off any cpu. No access to PMU is task
14520 + * is not current.
14522 + * For system-wide:
14523 + * task is always current
14525 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
14527 + struct pfm_event_set *set;
14529 + set = ctx->active_set;
14531 + if (task != current)
14535 + * cannot restore PMC if no access to PMU. Will be done
14536 + * when the thread is switched back in
14539 + pfm_arch_restore_pmcs(ctx, set);
14543 + * pfm_arch_restore_pmds - reload PMD registers
14544 + * @ctx: context to restore from
14545 + * @set: current event set
14547 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
14548 + * pfm_context_load_sys(), pfm_ctxsw()
14550 + * Context is locked. Interrupts are masked. Set cannot be NULL.
14551 + * Access to the PMU is guaranteed.
14553 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
14555 + struct pfm_arch_pmu_info *pmu_info;
14558 + pmu_info = pfm_pmu_info();
14560 + num = set->nused_pmds;
14563 + * model-specific override
14565 + if (pmu_info->restore_pmds) {
14566 + pmu_info->restore_pmds(ctx, set);
14571 + * we can restore only the PMD we use because:
14573 + * - can only read with pfm_read_pmds() the registers
14574 + * declared used via pfm_write_pmds(), smpl_pmds, reset_pmds
14576 + * - if cr4.pce=1, only counters are exposed to user. RDPMC
14577 + * does not work with other types of PMU registers.Thus, no
14578 + * address is ever exposed by counters
14580 + * - there is never a dependency between one pmd register and
14583 + for (i = 0; num; i++) {
14584 + if (likely(test_bit(i, cast_ulp(set->used_pmds)))) {
14585 + pfm_write_pmd(ctx, i, set->pmds[i].value);
14592 + * pfm_arch_restore_pmcs - reload PMC registers
14593 + * @ctx: context to restore from
14594 + * @set: current event set
14596 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
14597 + * pfm_context_load_sys(), pfm_ctxsw().
14599 + * Context is locked. Interrupts are masked. set cannot be NULL.
14600 + * Access to the PMU is guaranteed.
14602 + * function must restore all PMC registers from set
14604 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
14606 + struct pfm_arch_pmu_info *pmu_info;
14610 + pmu_info = pfm_pmu_info();
14613 + * we need to restore PMCs only when:
14614 + * - context is not masked
14615 + * - monitoring activated
14617 + * Masking monitoring after an overflow does not change the
14618 + * value of flags.started
14620 + if (ctx->state == PFM_CTX_MASKED || !ctx->flags.started)
14624 + * model-specific override
14626 + if (pmu_info->restore_pmcs) {
14627 + pmu_info->restore_pmcs(ctx, set);
14631 + * restore all pmcs
14633 + * It is not possible to restore only the pmcs we used because
14634 + * certain PMU models (e.g. Pentium 4) have dependencies. Thus
14635 + * we do not want one application using stale PMC coming from
14638 + * On PMU models where there is no dependencies between pmc, then
14639 + * it is possible to optimize by only restoring the registers that
14640 + * are used, and this can be done with the models-specific override
14641 + * for this function.
14643 + * The default code takes the safest approach, i.e., assume the worse
14645 + mask = ctx->regs.pmcs;
14646 + num = ctx->regs.num_pmcs;
14647 + for (i = 0; num; i++) {
14648 + if (test_bit(i, cast_ulp(mask))) {
14649 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
14656 + * smp_pmu_interrupt - lowest level PMU interrupt handler for X86
14657 + * @regs: machine state
14659 + * The PMU interrupt is handled through an interrupt gate, therefore
14660 + * the CPU automatically clears the EFLAGS.IF, i.e., masking interrupts.
14662 + * The perfmon interrupt handler MUST run with interrupts disabled due
14663 + * to possible race with other, higher priority interrupts, such as timer
14664 + * or IPI function calls.
14666 + * See description in IA-32 architecture manual, Vol 3 section 5.8.1
14668 +void smp_pmu_interrupt(struct pt_regs *regs)
14670 + struct pfm_arch_pmu_info *pmu_info;
14671 + struct pfm_context *ctx;
14672 + unsigned long iip;
14675 + using_nmi = __get_cpu_var(pfm_using_nmi);
14682 + * when using NMI, pfm_handle_nmi() gets called
14683 + * first. It stops monitoring and record the
14684 + * iip into real_iip, then it repost the interrupt
14685 + * using the lower priority vector LOCAL_PERFMON_VECTOR
14687 + * On some processors, e.g., P4, it may be that some
14688 + * state is already recorded from pfm_handle_nmi()
14689 + * and it only needs to be copied back into the normal
14690 + * fields so it can be used transparently by higher level
14694 + ctx = __get_cpu_var(pmu_ctx);
14695 + pmu_info = pfm_pmu_info();
14696 + iip = __get_cpu_var(real_iip);
14697 + if (ctx && pmu_info->nmi_copy_state)
14698 + pmu_info->nmi_copy_state(ctx);
14700 + iip = instruction_pointer(regs);
14702 + pfm_interrupt_handler(iip, regs);
14705 + * On Intel P6, Pentium M, P4, Intel Core:
14706 + * - it is necessary to clear the MASK field for the LVTPC
14707 + * vector. Otherwise interrupts remain masked. See
14710 + * - the documentation does not stipulate the behavior.
14711 + * To be safe, we also rewrite the vector to clear the
14714 + if (!using_nmi && current_cpu_data.x86_vendor == X86_VENDOR_INTEL)
14715 + apic_write(APIC_LVTPC, LOCAL_PERFMON_VECTOR);
14721 + * pfm_handle_nmi - PMU NMI handler notifier callback
14722 + * @nb ; notifier block
14723 + * @val: type of die notifier
14724 + * @data: die notifier-specific data
14726 + * called from notify_die() notifier from an trap handler path. We only
14727 + * care about NMI related callbacks, and ignore everything else.
14729 + * Cannot grab any locks, include the perfmon context lock
14731 + * Must detect if NMI interrupt comes from perfmon, and if so it must
14732 + * stop the PMU and repost a lower-priority interrupt. The perfmon interrupt
14733 + * handler needs to grab the context lock, thus is cannot be run directly
14734 + * from the NMI interrupt call path.
14736 +static int __kprobes pfm_handle_nmi(struct notifier_block *nb,
14737 + unsigned long val,
14740 + struct die_args *args = data;
14741 + struct pfm_context *ctx;
14742 + struct pfm_arch_pmu_info *pmu_info;
14745 + * only NMI related calls
14747 + if (val != DIE_NMI_IPI)
14748 + return NOTIFY_DONE;
14751 + * perfmon not using NMI
14753 + if (!__get_cpu_var(pfm_using_nmi))
14754 + return NOTIFY_DONE;
14759 + ctx = __get_cpu_var(pmu_ctx);
14761 + PFM_DBG_ovfl("no ctx");
14762 + return NOTIFY_DONE;
14766 + * Detect if we have overflows, i.e., NMI interrupt
14769 + pmu_info = pfm_pmu_conf->pmu_info;
14770 + if (!pmu_info->has_ovfls(ctx)) {
14771 + PFM_DBG_ovfl("no ovfl");
14772 + return NOTIFY_DONE;
14776 + * we stop the PMU to avoid further overflow before this
14777 + * one is treated by lower priority interrupt handler
14779 + pmu_info->quiesce();
14782 + * record actual instruction pointer
14784 + __get_cpu_var(real_iip) = instruction_pointer(args->regs);
14787 + * post lower priority interrupt (LOCAL_PERFMON_VECTOR)
14789 + pfm_arch_resend_irq(ctx);
14791 + pfm_stats_inc(ovfl_intr_nmi_count);
14794 + * we need to rewrite the APIC vector on Intel
14796 + if (current_cpu_data.x86_vendor == X86_VENDOR_INTEL)
14797 + apic_write(APIC_LVTPC, APIC_DM_NMI);
14800 + * the notification was for us
14802 + return NOTIFY_STOP;
14805 +static struct notifier_block pfm_nmi_nb = {
14806 + .notifier_call = pfm_handle_nmi
14810 + * pfm_arch_get_pmu_module_name - get PMU description module name for autoload
14812 + * called from pfm_pmu_request_module
14814 +char *pfm_arch_get_pmu_module_name(void)
14816 + switch (current_cpu_data.x86) {
14818 + switch (current_cpu_data.x86_model) {
14819 + case 3: /* Pentium II */
14822 + return "perfmon_p6";
14823 + case 15: /* Merom */
14824 + case 23: /* Penryn */
14825 + return "perfmon_intel_core";
14826 + case 28: /* Atom/Silverthorne */
14827 + return "perfmon_intel_atom";
14828 + case 29: /* Dunnington */
14829 + return "perfmon_intel_core";
14835 + /* All Opteron processors */
14836 + if (current_cpu_data.x86_vendor == X86_VENDOR_AMD)
14837 + return "perfmon_amd64";
14839 + switch (current_cpu_data.x86_model) {
14841 + return "perfmon_p4";
14843 + /* FALL THROUGH */
14846 + if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
14847 + return "perfmon_intel_arch";
14854 + * pfm_arch_resend_irq - post perfmon interrupt on regular vector
14856 + * called from pfm_ctxswin_thread() and pfm_handle_nmi()
14858 +void pfm_arch_resend_irq(struct pfm_context *ctx)
14860 + unsigned long val, dest;
14862 + * we cannot use hw_resend_irq() because it goes to
14863 + * the I/O APIC. We need to go to the Local APIC.
14865 + * The "int vec" is not the right solution either
14866 + * because it triggers a software intr. We need
14867 + * to regenerate the interrupt and have it pended
14868 + * until we unmask interrupts.
14870 + * Instead we send ourself an IPI on the perfmon
14873 + val = APIC_DEST_SELF|APIC_INT_ASSERT|
14874 + APIC_DM_FIXED|LOCAL_PERFMON_VECTOR;
14876 + dest = apic_read(APIC_ID);
14877 + apic_write(APIC_ICR2, dest);
14878 + apic_write(APIC_ICR, val);
14882 + * pfm_arch_pmu_acquire_percpu - setup APIC per CPU
14883 + * @data: contains pmu flags
14885 +static void pfm_arch_pmu_acquire_percpu(void *data)
14888 + struct pfm_arch_pmu_info *pmu_info;
14889 + unsigned int tmp, vec;
14890 + unsigned long flags = (unsigned long)data;
14891 + unsigned long lvtpc;
14893 + pmu_info = pfm_pmu_conf->pmu_info;
14896 + * we only reprogram the LVTPC vector if we have detected
14897 + * no sharing, otherwise it means the APIC is already programmed
14898 + * and we use whatever vector (likely NMI) is there
14900 + if (!(flags & PFM_X86_FL_SHARING)) {
14901 + if (flags & PFM_X86_FL_USE_NMI)
14902 + vec = APIC_DM_NMI;
14904 + vec = LOCAL_PERFMON_VECTOR;
14906 + tmp = apic_read(APIC_LVTERR);
14907 + apic_write(APIC_LVTERR, tmp | APIC_LVT_MASKED);
14908 + apic_write(APIC_LVTPC, vec);
14909 + apic_write(APIC_LVTERR, tmp);
14911 + lvtpc = (unsigned long)apic_read(APIC_LVTPC);
14913 + __get_cpu_var(pfm_using_nmi) = lvtpc == APIC_DM_NMI;
14915 + PFM_DBG("LTVPC=0x%lx using_nmi=%d", lvtpc, __get_cpu_var(pfm_using_nmi));
14918 + * invoke model specific acquire routine. May be used for
14919 + * model-specific initializations
14921 + if (pmu_info->acquire_pmu_percpu)
14922 + pmu_info->acquire_pmu_percpu();
14926 + * pfm_arch_pmu_acquire - acquire PMU resource from system
14927 + * @unavail_pmcs : bitmask to use to set unavailable pmcs
14928 + * @unavail_pmds : bitmask to use to set unavailable pmds
14930 + * interrupts are not masked
14932 + * Grab PMU registers from lower level MSR allocator
14934 + * Program the APIC according the possible interrupt vector
14935 + * either LOCAL_PERFMON_VECTOR or NMI
14937 +int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
14939 + struct pfm_arch_pmu_info *pmu_info;
14940 + struct pfm_regmap_desc *d;
14943 + pmu_info = pfm_pmu_conf->pmu_info;
14944 + pmu_info->flags &= ~PFM_X86_FL_SHARING;
14948 + d = pfm_pmu_conf->pmc_desc;
14949 + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
14950 + if (!(d->type & PFM_REG_I))
14953 + if (d->type & PFM_REG_V)
14956 + * reserve register with lower-level allocator
14958 + if (!reserve_evntsel_nmi(d->hw_addr)) {
14959 + PFM_DBG("pmc%d(%s) already used", i, d->desc);
14960 + __set_bit(i, cast_ulp(unavail_pmcs));
14965 + PFM_DBG("nlost=%d info_flags=0x%x\n", nlost, pmu_info->flags);
14967 + * some PMU models (e.g., P6) do not support sharing
14968 + * so check if we found less than the expected number of PMC registers
14971 + if (pmu_info->flags & PFM_X86_FL_NO_SHARING) {
14972 + PFM_INFO("PMU already used by another subsystem, "
14973 + "PMU does not support sharing, "
14974 + "try disabling Oprofile or "
14975 + "reboot with nmi_watchdog=0");
14978 + pmu_info->flags |= PFM_X86_FL_SHARING;
14981 + d = pfm_pmu_conf->pmd_desc;
14982 + for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) {
14983 + if (!(d->type & PFM_REG_I))
14986 + if (d->type & PFM_REG_V)
14989 + if (!reserve_perfctr_nmi(d->hw_addr)) {
14990 + PFM_DBG("pmd%d(%s) already used", i, d->desc);
14991 + __set_bit(i, cast_ulp(unavail_pmds));
14995 + * program APIC on each CPU
14997 + on_each_cpu(pfm_arch_pmu_acquire_percpu,
14998 + (void *)(unsigned long)pmu_info->flags , 1);
15003 + * must undo reservation of pmcs in case of error
15005 + d = pfm_pmu_conf->pmc_desc;
15006 + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
15007 + if (!(d->type & (PFM_REG_I|PFM_REG_V)))
15009 + if (!test_bit(i, cast_ulp(unavail_pmcs)))
15010 + release_evntsel_nmi(d->hw_addr);
15015 + * pfm-arch_pmu_release_percpu - clear NMI state for one CPU
15018 +static void pfm_arch_pmu_release_percpu(void *data)
15020 + struct pfm_arch_pmu_info *pmu_info;
15022 + pmu_info = pfm_pmu_conf->pmu_info;
15024 + __get_cpu_var(pfm_using_nmi) = 0;
15027 + * invoke model specific release routine.
15028 + * May be used to undo certain initializations
15029 + * or free some model-specific ressources.
15031 + if (pmu_info->release_pmu_percpu)
15032 + pmu_info->release_pmu_percpu();
15036 + * pfm_arch_pmu_release - release PMU resource to system
15038 + * called from pfm_pmu_release()
15039 + * interrupts are not masked
15041 + * On x86, we return the PMU registers to the MSR allocator
15043 +void pfm_arch_pmu_release(void)
15045 + struct pfm_regmap_desc *d;
15048 + d = pfm_pmu_conf->pmc_desc;
15049 + n = pfm_pmu_conf->regs_all.num_pmcs;
15050 + for (i = 0; n; i++, d++) {
15051 + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15053 + release_evntsel_nmi(d->hw_addr);
15055 + PFM_DBG("pmc%u released", i);
15057 + d = pfm_pmu_conf->pmd_desc;
15058 + n = pfm_pmu_conf->regs_all.num_pmds;
15059 + for (i = 0; n; i++, d++) {
15060 + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmds)))
15062 + release_perfctr_nmi(d->hw_addr);
15064 + PFM_DBG("pmd%u released", i);
15067 + /* clear NMI variable if used */
15068 + if (__get_cpu_var(pfm_using_nmi))
15069 + on_each_cpu(pfm_arch_pmu_release_percpu, NULL , 1);
15073 + * pfm_arch_pmu_config_init - validate PMU description structure
15074 + * @cfg: PMU description structure
15078 + * errno otherwise
15080 + * called from pfm_pmu_register()
15082 +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
15084 + struct pfm_arch_pmu_info *pmu_info;
15086 + pmu_info = pfm_pmu_info();
15088 + PFM_DBG("%s missing pmu_info", cfg->pmu_name);
15091 + if (!pmu_info->has_ovfls) {
15092 + PFM_DBG("%s missing has_ovfls callback", cfg->pmu_name);
15095 + if (!pmu_info->quiesce) {
15096 + PFM_DBG("%s missing quiesce callback", cfg->pmu_name);
15099 + if (!pmu_info->stop_save) {
15100 + PFM_DBG("%s missing stop_save callback", cfg->pmu_name);
15107 + * pfm_arch_init - one time global arch-specific initialization
15109 + * called from pfm_init()
15111 +int __init pfm_arch_init(void)
15114 + * we need to register our NMI handler when the kernels boots
15115 + * to avoid a deadlock condition with the NMI watchdog or Oprofile
15116 + * if we were to try and register/unregister on-demand.
15118 + register_die_notifier(&pfm_nmi_nb);
15121 diff --git a/arch/x86/perfmon/perfmon_amd64.c b/arch/x86/perfmon/perfmon_amd64.c
15122 new file mode 100644
15123 index 0000000..f9b5f9c
15125 +++ b/arch/x86/perfmon/perfmon_amd64.c
15128 + * This file contains the PMU description for the Athlon64 and Opteron64
15129 + * processors. It supports 32 and 64-bit modes.
15131 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
15132 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
15134 + * Copyright (c) 2007 Advanced Micro Devices, Inc.
15135 + * Contributed by Robert Richter <robert.richter@amd.com>
15137 + * This program is free software; you can redistribute it and/or
15138 + * modify it under the terms of version 2 of the GNU General Public
15139 + * License as published by the Free Software Foundation.
15141 + * This program is distributed in the hope that it will be useful,
15142 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15143 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15144 + * General Public License for more details.
15146 + * You should have received a copy of the GNU General Public License
15147 + * along with this program; if not, write to the Free Software
15148 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
15151 +#include <linux/module.h>
15152 +#include <linux/vmalloc.h>
15153 +#include <linux/topology.h>
15154 +#include <linux/kprobes.h>
15155 +#include <linux/pci.h>
15156 +#include <linux/perfmon_kern.h>
15157 +#include <asm/hw_irq.h>
15158 +#include <asm/apic.h>
15160 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
15161 +MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
15162 +MODULE_DESCRIPTION("AMD64 PMU description table");
15163 +MODULE_LICENSE("GPL");
15165 +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203
15167 +static int force_nmi;
15168 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
15169 +module_param(force_nmi, bool, 0600);
15171 +#define HAS_IBS 0x01 /* has IBS support */
15173 +static u8 ibs_eilvt_off, ibs_status; /* AMD: extended interrupt LVT offset */
15175 +static void pfm_amd64_restore_pmcs(struct pfm_context *ctx,
15176 + struct pfm_event_set *set);
15177 +static void __kprobes pfm_amd64_quiesce(void);
15178 +static int pfm_amd64_has_ovfls(struct pfm_context *ctx);
15179 +static int pfm_amd64_stop_save(struct pfm_context *ctx,
15180 + struct pfm_event_set *set);
15182 +#define IBSFETCHCTL_PMC 4 /* pmc4 */
15183 +#define IBSFETCHCTL_PMD 4 /* pmd4 */
15184 +#define IBSOPSCTL_PMC 5 /* pmc5 */
15185 +#define IBSOPSCTL_PMD 7 /* pmd7 */
15187 +static u64 enable_mask[PFM_MAX_PMCS];
15188 +static u16 max_enable;
15190 +static struct pfm_arch_pmu_info pfm_amd64_pmu_info = {
15191 + .stop_save = pfm_amd64_stop_save,
15192 + .has_ovfls = pfm_amd64_has_ovfls,
15193 + .quiesce = pfm_amd64_quiesce,
15194 + .restore_pmcs = pfm_amd64_restore_pmcs
15197 +#define PFM_AMD64_IBSFETCHVAL (1ULL<<49) /* valid fetch sample */
15198 +#define PFM_AMD64_IBSFETCHEN (1ULL<<48) /* fetch sampling enabled */
15199 +#define PFM_AMD64_IBSOPVAL (1ULL<<18) /* valid execution sample */
15200 +#define PFM_AMD64_IBSOPEN (1ULL<<17) /* execution sampling enabled */
15203 + * force Local APIC interrupt on overflow
15205 +#define PFM_K8_VAL (1ULL<<20)
15206 +#define PFM_K8_NO64 (1ULL<<20)
15209 + * reserved bits must be 1
15212 + * - upper 32 bits are reserved
15213 + * - bit 20, bit 21
15216 + * - bits 36-39 are reserved
15217 + * - bits 42-63 are reserved
15218 + * - bit 20, bit 21
15220 + * for IBS registers:
15221 + * IBSFETCHCTL: all bits are reserved except bits 57, 48, 15:0
15222 + * IBSOPSCTL : all bits are reserved except bits 17, 15:0
15224 +#define PFM_K8_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21))
15225 +#define PFM_16_RSVD ((0x3fffffULL<<42) | (0xfULL<<36) | (1ULL<<20) | (1ULL<<21))
15226 +#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<48)|(1ULL<<57)|0xffffULL))
15227 +#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<17)|0xffffULL))
15229 +static struct pfm_regmap_desc pfm_amd64_pmc_desc[] = {
15230 +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
15231 +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
15232 +/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
15233 +/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
15234 +/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", 0, PFM_AMD64_IBSFETCHCTL_RSVD, 0, MSR_AMD64_IBSFETCHCTL),
15235 +/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", 0, PFM_AMD64_IBSOPCTL_RSVD, 0, MSR_AMD64_IBSOPCTL),
15237 +#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
15239 +#define PFM_REG_IBS (PFM_REG_I|PFM_REG_INTR)
15242 + * AMD64 counters are 48 bits, upper bits are reserved
15244 +#define PFM_AMD64_CTR_RSVD (~((1ULL<<48)-1))
15246 +#define PFM_AMD_D(n) \
15247 + { .type = PFM_REG_C, \
15248 + .desc = "PERFCTR"#n, \
15249 + .hw_addr = MSR_K7_PERFCTR0+n, \
15250 + .rsvd_msk = PFM_AMD64_CTR_RSVD, \
15251 + .dep_pmcs[0] = 1ULL << n \
15254 +#define PFM_AMD_IBSO(t, s, a) \
15259 + .dep_pmcs[0] = 1ULL << 5 \
15262 +#define PFM_AMD_IBSF(t, s, a) \
15267 + .dep_pmcs[0] = 1ULL << 6 \
15270 +static struct pfm_regmap_desc pfm_amd64_pmd_desc[] = {
15271 +/* pmd0 */ PFM_AMD_D(0),
15272 +/* pmd1 */ PFM_AMD_D(1),
15273 +/* pmd2 */ PFM_AMD_D(2),
15274 +/* pmd3 */ PFM_AMD_D(3),
15275 +/* pmd4 */ PFM_AMD_IBSF(PFM_REG_IBS, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
15276 +/* pmd5 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
15277 +/* pmd6 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
15278 +/* pmd7 */ PFM_AMD_IBSO(PFM_REG_IBS, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
15279 +/* pmd8 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
15280 +/* pmd9 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
15281 +/* pmd10 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
15282 +/* pmd11 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
15283 +/* pmd12 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
15284 +/* pmd13 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
15286 +#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
15288 +static struct pfm_context **pfm_nb_sys_owners;
15289 +static struct pfm_context *pfm_nb_task_owner;
15291 +static struct pfm_pmu_config pfm_amd64_pmu_conf;
15293 +#define is_ibs_pmc(x) (x == 4 || x == 5)
15295 +static void pfm_amd64_setup_eilvt_per_cpu(void *info)
15299 + /* program the IBS vector to the perfmon vector */
15300 + lvt_off = setup_APIC_eilvt_ibs(LOCAL_PERFMON_VECTOR,
15301 + APIC_EILVT_MSG_FIX, 0);
15302 + PFM_DBG("APIC_EILVT%d set to 0x%x", lvt_off, LOCAL_PERFMON_VECTOR);
15303 + ibs_eilvt_off = lvt_off;
15306 +static int pfm_amd64_setup_eilvt(void)
15308 +#define IBSCTL_LVTOFFSETVAL (1 << 8)
15309 +#define IBSCTL 0x1cc
15310 + struct pci_dev *cpu_cfg;
15314 + /* per CPU setup */
15315 + on_each_cpu(pfm_amd64_setup_eilvt_per_cpu, NULL, 1);
15320 + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
15321 + PCI_DEVICE_ID_AMD_10H_NB_MISC,
15326 + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
15327 + | IBSCTL_LVTOFFSETVAL);
15328 + pci_read_config_dword(cpu_cfg, IBSCTL, &value);
15329 + if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
15330 + PFM_DBG("Failed to setup IBS LVT offset, "
15331 + "IBSCTL = 0x%08x", value);
15337 + PFM_DBG("No CPU node configured for IBS");
15341 +#ifdef CONFIG_NUMA
15342 + /* Sanity check */
15343 + /* Works only for 64bit with proper numa implementation. */
15344 + if (nodes != num_possible_nodes()) {
15345 + PFM_DBG("Failed to setup CPU node(s) for IBS, "
15346 + "found: %d, expected %d",
15347 + nodes, num_possible_nodes());
15355 + * There can only be one user per socket for the Northbridge (NB) events,
15356 + * so we enforce mutual exclusion as follows:
15357 + * - per-thread : only one context machine-wide can use NB events
15358 + * - system-wide: only one context per processor socket
15360 + * Exclusion is enforced at:
15361 + * - pfm_load_context()
15362 + * - pfm_write_pmcs() for attached contexts
15364 + * Exclusion is released at:
15365 + * - pfm_unload_context() or any calls that implicitely uses it
15368 + * 0 : successfully acquire NB access
15369 + * < 0: errno, failed to acquire NB access
15371 +static int pfm_amd64_acquire_nb(struct pfm_context *ctx)
15373 + struct pfm_context **entry, *old;
15377 + proc_id = cpu_data(smp_processor_id()).phys_proc_id;
15382 + if (ctx->flags.system)
15383 + entry = &pfm_nb_sys_owners[proc_id];
15385 + entry = &pfm_nb_task_owner;
15387 + old = cmpxchg(entry, NULL, ctx);
15389 + if (ctx->flags.system)
15390 + PFM_DBG("acquired Northbridge event access on socket %u", proc_id);
15392 + PFM_DBG("acquired Northbridge event access globally");
15393 + } else if (old != ctx) {
15394 + if (ctx->flags.system)
15395 + PFM_DBG("NorthBridge event conflict on socket %u", proc_id);
15397 + PFM_DBG("global NorthBridge event conflict");
15404 + * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e.,
15405 + * when we have detected a multi-core processor.
15407 + * context is locked, interrupts are masked
15409 +static int pfm_amd64_pmc_write_check(struct pfm_context *ctx,
15410 + struct pfm_event_set *set,
15411 + struct pfarg_pmc *req)
15413 + unsigned int event;
15416 + * delay checking NB event until we load the context
15418 + if (ctx->state == PFM_CTX_UNLOADED)
15422 + * check event is NB event
15424 + event = (unsigned int)(req->reg_value & 0xff);
15425 + if (event < 0xee)
15428 + return pfm_amd64_acquire_nb(ctx);
15432 + * invoked on pfm_load_context().
15433 + * context is locked, interrupts are masked
15435 +static int pfm_amd64_load_context(struct pfm_context *ctx)
15437 + struct pfm_event_set *set;
15438 + unsigned int i, n;
15441 + * scan all sets for NB events
15443 + list_for_each_entry(set, &ctx->set_list, list) {
15444 + n = set->nused_pmcs;
15445 + for (i = 0; n; i++) {
15446 + if (!test_bit(i, cast_ulp(set->used_pmcs)))
15449 + if (!is_ibs_pmc(i) && (set->pmcs[i] & 0xff) >= 0xee)
15456 + return pfm_amd64_acquire_nb(ctx);
15460 + * invoked on pfm_unload_context()
15462 +static void pfm_amd64_unload_context(struct pfm_context *ctx)
15464 + struct pfm_context **entry, *old;
15468 + proc_id = cpu_data(smp_processor_id()).phys_proc_id;
15474 + * unload always happens on the monitored CPU in system-wide
15476 + if (ctx->flags.system)
15477 + entry = &pfm_nb_sys_owners[proc_id];
15479 + entry = &pfm_nb_task_owner;
15481 + old = cmpxchg(entry, ctx, NULL);
15482 + if (old == ctx) {
15483 + if (ctx->flags.system)
15484 + PFM_DBG("released NorthBridge on socket %u", proc_id);
15486 + PFM_DBG("released NorthBridge events globally");
15491 + * detect if we need to activate NorthBridge event access control
15493 +static int pfm_amd64_setup_nb_event_control(void)
15495 + unsigned int c, n = 0;
15496 + unsigned int max_phys = 0;
15499 + for_each_possible_cpu(c) {
15500 + if (cpu_data(c).phys_proc_id > max_phys)
15501 + max_phys = cpu_data(c).phys_proc_id;
15506 + if (max_phys > 255) {
15507 + PFM_INFO("socket id %d is too big to handle", max_phys);
15511 + n = max_phys + 1;
15515 + pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners));
15516 + if (!pfm_nb_sys_owners)
15519 + memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners));
15520 + pfm_nb_task_owner = NULL;
15523 + * activate write-checker for PMC registers
15525 + for (c = 0; c < PFM_AMD_NUM_PMCS; c++) {
15526 + if (!is_ibs_pmc(c))
15527 + pfm_amd64_pmc_desc[c].type |= PFM_REG_WC;
15530 + pfm_amd64_pmu_info.load_context = pfm_amd64_load_context;
15531 + pfm_amd64_pmu_info.unload_context = pfm_amd64_unload_context;
15533 + pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check;
15535 + PFM_INFO("NorthBridge event access control enabled");
15541 + * disable registers which are not available on
15542 + * the host (applies to IBS registers)
15544 +static void pfm_amd64_check_registers(void)
15548 + PFM_DBG("has_ibs=%d", !!(ibs_status & HAS_IBS));
15550 + __set_bit(0, cast_ulp(enable_mask));
15551 + __set_bit(1, cast_ulp(enable_mask));
15552 + __set_bit(2, cast_ulp(enable_mask));
15553 + __set_bit(3, cast_ulp(enable_mask));
15554 + max_enable = 3+1;
15558 + * remove IBS registers if feature not present
15560 + if (!(ibs_status & HAS_IBS)) {
15561 + pfm_amd64_pmc_desc[4].type = PFM_REG_NA;
15562 + pfm_amd64_pmc_desc[5].type = PFM_REG_NA;
15563 + for (i = 4; i < 14; i++)
15564 + pfm_amd64_pmd_desc[i].type = PFM_REG_NA;
15566 + __set_bit(16, cast_ulp(enable_mask));
15567 + __set_bit(17, cast_ulp(enable_mask));
15568 + max_enable = 17 + 1;
15572 + * adjust reserved bit fields for family 16
15574 + if (current_cpu_data.x86 == 16) {
15575 + for (i = 0; i < PFM_AMD_NUM_PMCS; i++)
15576 + if (pfm_amd64_pmc_desc[i].rsvd_msk == PFM_K8_RSVD)
15577 + pfm_amd64_pmc_desc[i].rsvd_msk = PFM_16_RSVD;
15581 +static int pfm_amd64_probe_pmu(void)
15584 + if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
15585 + PFM_INFO("not an AMD processor");
15589 + switch (current_cpu_data.x86) {
15595 + PFM_INFO("unsupported family=%d", current_cpu_data.x86);
15599 + /* check for IBS */
15600 + if (cpu_has(¤t_cpu_data, X86_FEATURE_IBS)) {
15601 + ibs_status |= HAS_IBS;
15602 + rdmsrl(MSR_AMD64_IBSCTL, val);
15605 + PFM_INFO("found family=%d IBSCTL=0x%llx", current_cpu_data.x86, (unsigned long long)val);
15608 + * check for local APIC (required)
15610 + if (!cpu_has_apic) {
15611 + PFM_INFO("no local APIC, unsupported");
15615 + if (current_cpu_data.x86_max_cores > 1
15616 + && pfm_amd64_setup_nb_event_control())
15620 + pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
15622 + if (ibs_status & HAS_IBS) {
15623 + /* Setup extended interrupt */
15624 + if (pfm_amd64_setup_eilvt()) {
15625 + PFM_INFO("Failed to initialize extended interrupts "
15627 + ibs_status &= ~HAS_IBS;
15628 + PFM_INFO("Unable to use IBS");
15630 + PFM_INFO("IBS supported");
15634 + pfm_amd64_check_registers();
15640 + * detect is counters have overflowed.
15642 + * 0 : no overflow
15643 + * 1 : at least one overflow
15645 +static int __kprobes pfm_amd64_has_ovfls(struct pfm_context *ctx)
15647 + struct pfm_regmap_desc *xrd;
15653 + * Check for IBS events
15655 + if (ibs_status & HAS_IBS) {
15656 + rdmsrl(MSR_AMD64_IBSFETCHCTL, val);
15657 + if (val & PFM_AMD64_IBSFETCHVAL)
15659 + rdmsrl(MSR_AMD64_IBSOPCTL, val);
15660 + if (val & PFM_AMD64_IBSOPVAL)
15664 + * Check regular counters
15666 + cnt_mask = ctx->regs.cnt_pmds;
15667 + num = ctx->regs.num_counters;
15668 + wmask = 1ULL << pfm_pmu_conf->counter_width;
15669 + xrd = pfm_amd64_pmd_desc;
15671 + for (i = 0; num; i++) {
15672 + if (test_bit(i, cast_ulp(cnt_mask))) {
15673 + rdmsrl(xrd[i].hw_addr, val);
15674 + if (!(val & wmask))
15683 + * Must check for IBS event BEFORE stop_save_p6 because
15684 + * stopping monitoring does destroy IBS state information
15685 + * in IBSFETCHCTL/IBSOPCTL because they are tagged as enable
15688 +static int pfm_amd64_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
15690 + struct pfm_arch_pmu_info *pmu_info;
15691 + u64 used_mask[PFM_PMC_BV];
15693 + u64 val, wmask, ovfl_mask;
15694 + u32 i, count, use_ibs;
15696 + pmu_info = pfm_pmu_info();
15700 + * - on family 10h processor with IBS
15701 + * - at least one of the IBS PMD registers is used
15703 + use_ibs = (ibs_status & HAS_IBS)
15704 + && (test_bit(IBSFETCHCTL_PMD, cast_ulp(set->used_pmds))
15705 + || test_bit(IBSOPSCTL_PMD, cast_ulp(set->used_pmds)));
15707 + wmask = 1ULL << pfm_pmu_conf->counter_width;
15709 + bitmap_and(cast_ulp(used_mask),
15710 + cast_ulp(set->used_pmcs),
15711 + cast_ulp(enable_mask),
15714 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
15717 + * stop monitoring
15718 + * Unfortunately, this is very expensive!
15719 + * wrmsrl() is serializing.
15721 + * With IBS, we need to do read-modify-write to preserve the content
15722 + * for OpsCTL and FetchCTL because they are also used as PMDs and saved
15726 + for (i = 0; count; i++) {
15727 + if (test_bit(i, cast_ulp(used_mask))) {
15728 + if (i == IBSFETCHCTL_PMC) {
15729 + rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
15730 + val &= ~PFM_AMD64_IBSFETCHEN;
15731 + } else if (i == IBSOPSCTL_PMC) {
15732 + rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
15733 + val &= ~PFM_AMD64_IBSOPEN;
15736 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
15741 + for (i = 0; count; i++) {
15742 + if (test_bit(i, cast_ulp(used_mask))) {
15743 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
15750 + * if we already having a pending overflow condition, we simply
15751 + * return to take care of this first.
15753 + if (set->npend_ovfls)
15756 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
15757 + cnt_pmds = ctx->regs.cnt_pmds;
15760 + * check for pending overflows and save PMDs (combo)
15761 + * we employ used_pmds because we also need to save
15762 + * and not just check for pending interrupts.
15764 + * Must check for counting PMDs because of virtual PMDs and IBS
15766 + count = set->nused_pmds;
15767 + for (i = 0; count; i++) {
15768 + if (test_bit(i, cast_ulp(set->used_pmds))) {
15769 + val = pfm_arch_read_pmd(ctx, i);
15770 + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
15771 + if (!(val & wmask)) {
15772 + __set_bit(i, cast_ulp(set->povfl_pmds));
15773 + set->npend_ovfls++;
15775 + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask);
15777 + set->pmds[i].value = val;
15783 + * check if IBS contains valid data, and mark the corresponding
15784 + * PMD has overflowed
15787 + if (set->pmds[IBSFETCHCTL_PMD].value & PFM_AMD64_IBSFETCHVAL) {
15788 + __set_bit(IBSFETCHCTL_PMD, cast_ulp(set->povfl_pmds));
15789 + set->npend_ovfls++;
15791 + if (set->pmds[IBSOPSCTL_PMD].value & PFM_AMD64_IBSOPVAL) {
15792 + __set_bit(IBSOPSCTL_PMD, cast_ulp(set->povfl_pmds));
15793 + set->npend_ovfls++;
15796 + /* 0 means: no need to save PMDs at upper level */
15801 + * pfm_amd64_quiesce_pmu -- stop monitoring without grabbing any lock
15803 + * called from NMI interrupt handler to immediately stop monitoring
15804 + * cannot grab any lock, including perfmon related locks
15806 +static void __kprobes pfm_amd64_quiesce(void)
15809 + * quiesce PMU by clearing available registers that have
15810 + * the start/stop capability
15812 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15813 + wrmsrl(MSR_K7_EVNTSEL0, 0);
15814 + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15815 + wrmsrl(MSR_K7_EVNTSEL0+1, 0);
15816 + if (test_bit(2, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15817 + wrmsrl(MSR_K7_EVNTSEL0+2, 0);
15818 + if (test_bit(3, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15819 + wrmsrl(MSR_K7_EVNTSEL0+3, 0);
15821 + if (test_bit(4, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15822 + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
15823 + if (test_bit(5, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15824 + wrmsrl(MSR_AMD64_IBSOPCTL, 0);
15828 + * pfm_amd64_restore_pmcs - reload PMC registers
15829 + * @ctx: context to restore from
15830 + * @set: current event set
15832 + * optimized version of pfm_arch_restore_pmcs(). On AMD64, we can
15833 + * afford to only restore the pmcs registers we use, because they are
15834 + * all independent from each other.
15836 +static void pfm_amd64_restore_pmcs(struct pfm_context *ctx,
15837 + struct pfm_event_set *set)
15842 + mask = set->used_pmcs;
15843 + num = set->nused_pmcs;
15844 + for (i = 0; num; i++) {
15845 + if (test_bit(i, cast_ulp(mask))) {
15846 + wrmsrl(pfm_amd64_pmc_desc[i].hw_addr, set->pmcs[i]);
15852 +static struct pfm_pmu_config pfm_amd64_pmu_conf = {
15853 + .pmu_name = "AMD64",
15854 + .counter_width = 47,
15855 + .pmd_desc = pfm_amd64_pmd_desc,
15856 + .pmc_desc = pfm_amd64_pmc_desc,
15857 + .num_pmc_entries = PFM_AMD_NUM_PMCS,
15858 + .num_pmd_entries = PFM_AMD_NUM_PMDS,
15859 + .probe_pmu = pfm_amd64_probe_pmu,
15860 + .version = "1.2",
15861 + .pmu_info = &pfm_amd64_pmu_info,
15862 + .flags = PFM_PMU_BUILTIN_FLAG,
15863 + .owner = THIS_MODULE,
15866 +static int __init pfm_amd64_pmu_init_module(void)
15868 + return pfm_pmu_register(&pfm_amd64_pmu_conf);
15871 +static void __exit pfm_amd64_pmu_cleanup_module(void)
15873 + if (pfm_nb_sys_owners)
15874 + vfree(pfm_nb_sys_owners);
15876 + pfm_pmu_unregister(&pfm_amd64_pmu_conf);
15879 +module_init(pfm_amd64_pmu_init_module);
15880 +module_exit(pfm_amd64_pmu_cleanup_module);
15881 diff --git a/arch/x86/perfmon/perfmon_intel_arch.c b/arch/x86/perfmon/perfmon_intel_arch.c
15882 new file mode 100644
15883 index 0000000..e27a732
15885 +++ b/arch/x86/perfmon/perfmon_intel_arch.c
15888 + * This file contains the Intel architectural perfmon v1, v2, v3
15889 + * description tables.
15891 + * Architectural perfmon was introduced with Intel Core Solo/Duo
15894 + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P.
15895 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
15897 + * This program is free software; you can redistribute it and/or
15898 + * modify it under the terms of version 2 of the GNU General Public
15899 + * License as published by the Free Software Foundation.
15901 + * This program is distributed in the hope that it will be useful,
15902 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15903 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15904 + * General Public License for more details.
15906 + * You should have received a copy of the GNU General Public License
15907 + * along with this program; if not, write to the Free Software
15908 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
15911 +#include <linux/module.h>
15912 +#include <linux/kprobes.h>
15913 +#include <linux/perfmon_kern.h>
15914 +#include <linux/nmi.h>
15915 +#include <asm/msr.h>
15916 +#include <asm/apic.h>
15918 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
15919 +MODULE_DESCRIPTION("Intel architectural perfmon v1");
15920 +MODULE_LICENSE("GPL");
15922 +static int force, force_nmi;
15923 +MODULE_PARM_DESC(force, "bool: force module to load succesfully");
15924 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
15925 +module_param(force, bool, 0600);
15926 +module_param(force_nmi, bool, 0600);
15928 +static u64 enable_mask[PFM_MAX_PMCS];
15929 +static u16 max_enable;
15932 + * - upper 32 bits are reserved
15933 + * - INT: APIC enable bit is reserved (forced to 1)
15934 + * - bit 21 is reserved
15936 + * RSVD: reserved bits are 1
15938 +#define PFM_IA_PMC_RSVD ((~((1ULL<<32)-1)) \
15943 + * force Local APIC interrupt on overflow
15944 + * disable with NO_EMUL64
15946 +#define PFM_IA_PMC_VAL (1ULL<<20)
15947 +#define PFM_IA_NO64 (1ULL<<20)
15950 + * architectuture specifies that:
15951 + * IA32_PMCx MSR : starts at 0x0c1 & occupy a contiguous block of MSR
15952 + * IA32_PERFEVTSELx MSR : starts at 0x186 & occupy a contiguous block of MSR
15953 + * MSR_GEN_FIXED_CTR0 : starts at 0x309 & occupy a contiguous block of MSR
15955 +#define MSR_GEN_SEL_BASE MSR_P6_EVNTSEL0
15956 +#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0
15957 +#define MSR_GEN_FIXED_PMC_BASE MSR_CORE_PERF_FIXED_CTR0
15960 + * layout of EAX for CPUID.0xa leaf function
15963 + unsigned int version:8; /* architectural perfmon version */
15964 + unsigned int num_cnt:8; /* number of generic counters */
15965 + unsigned int cnt_width:8; /* width of generic counters */
15966 + unsigned int ebx_length:8; /* number of architected events */
15970 + * layout of EDX for CPUID.0xa leaf function when perfmon v2 is detected
15973 + unsigned int num_cnt:5; /* number of fixed counters */
15974 + unsigned int cnt_width:8; /* width of fixed counters */
15975 + unsigned int reserved:19;
15978 +static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx,
15979 + struct pfm_event_set *set);
15980 +static int pfm_intel_arch_stop_save(struct pfm_context *ctx,
15981 + struct pfm_event_set *set);
15982 +static int pfm_intel_arch_has_ovfls(struct pfm_context *ctx);
15983 +static void __kprobes pfm_intel_arch_quiesce(void);
15986 + * physical addresses of MSR controlling the perfevtsel and counter registers
15988 +struct pfm_arch_pmu_info pfm_intel_arch_pmu_info = {
15989 + .stop_save = pfm_intel_arch_stop_save,
15990 + .has_ovfls = pfm_intel_arch_has_ovfls,
15991 + .quiesce = pfm_intel_arch_quiesce,
15992 + .restore_pmcs = pfm_intel_arch_restore_pmcs
15995 +#define PFM_IA_C(n) { \
15996 + .type = PFM_REG_I64, \
15997 + .desc = "PERFEVTSEL"#n, \
15998 + .dfl_val = PFM_IA_PMC_VAL, \
15999 + .rsvd_msk = PFM_IA_PMC_RSVD, \
16000 + .no_emul64_msk = PFM_IA_NO64, \
16001 + .hw_addr = MSR_GEN_SEL_BASE+(n) \
16004 +#define PFM_IA_D(n) \
16005 + { .type = PFM_REG_C, \
16006 + .desc = "PMC"#n, \
16007 + .hw_addr = MSR_P6_PERFCTR0+n, \
16008 + .dep_pmcs[0] = 1ULL << n \
16011 +#define PFM_IA_FD(n) \
16012 + { .type = PFM_REG_C, \
16013 + .desc = "FIXED_CTR"#n, \
16014 + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
16015 + .dep_pmcs[0] = 1ULL << 16 \
16018 +static struct pfm_regmap_desc pfm_intel_arch_pmc_desc[] = {
16019 +/* pmc0 */ PFM_IA_C(0), PFM_IA_C(1), PFM_IA_C(2), PFM_IA_C(3),
16020 +/* pmc4 */ PFM_IA_C(4), PFM_IA_C(5), PFM_IA_C(6), PFM_IA_C(7),
16021 +/* pmc8 */ PFM_IA_C(8), PFM_IA_C(9), PFM_IA_C(10), PFM_IA_C(11),
16022 +/* pmc12 */ PFM_IA_C(12), PFM_IA_C(13), PFM_IA_C(14), PFM_IA_C(15),
16024 +/* pmc16 */ { .type = PFM_REG_I,
16025 + .desc = "FIXED_CTRL",
16026 + .dfl_val = 0x8888888888888888ULL, /* force PMI */
16027 + .rsvd_msk = 0, /* set dynamically */
16028 + .no_emul64_msk = 0,
16029 + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
16032 +#define PFM_IA_MAX_PMCS ARRAY_SIZE(pfm_intel_arch_pmc_desc)
16034 +static struct pfm_regmap_desc pfm_intel_arch_pmd_desc[] = {
16035 +/* pmd0 */ PFM_IA_D(0), PFM_IA_D(1), PFM_IA_D(2), PFM_IA_D(3),
16036 +/* pmd4 */ PFM_IA_D(4), PFM_IA_D(5), PFM_IA_D(6), PFM_IA_D(7),
16037 +/* pmd8 */ PFM_IA_D(8), PFM_IA_D(9), PFM_IA_D(10), PFM_IA_D(11),
16038 +/* pmd12 */ PFM_IA_D(12), PFM_IA_D(13), PFM_IA_D(14), PFM_IA_D(15),
16040 +/* pmd16 */ PFM_IA_FD(0), PFM_IA_FD(1), PFM_IA_FD(2), PFM_IA_FD(3),
16041 +/* pmd20 */ PFM_IA_FD(4), PFM_IA_FD(5), PFM_IA_FD(6), PFM_IA_FD(7),
16042 +/* pmd24 */ PFM_IA_FD(8), PFM_IA_FD(9), PFM_IA_FD(10), PFM_IA_FD(11),
16043 +/* pmd28 */ PFM_IA_FD(16), PFM_IA_FD(17), PFM_IA_FD(18), PFM_IA_FD(19)
16045 +#define PFM_IA_MAX_PMDS ARRAY_SIZE(pfm_intel_arch_pmd_desc)
16047 +#define PFM_IA_MAX_CNT 16 /* # generic counters in mapping table */
16048 +#define PFM_IA_MAX_FCNT 16 /* # of fixed counters in mapping table */
16049 +#define PFM_IA_FCNT_BASE 16 /* base index of fixed counters PMD */
16051 +static struct pfm_pmu_config pfm_intel_arch_pmu_conf;
16053 +static void pfm_intel_arch_check_errata(void)
16056 + * Core Duo errata AE49 (no fix). Both counters share a single
16057 + * enable bit in PERFEVTSEL0
16059 + if (current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 14)
16060 + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_NO_SHARING;
16063 +static inline void set_enable_mask(unsigned int i)
16065 + __set_bit(i, cast_ulp(enable_mask));
16067 + /* max_enable = highest + 1 */
16068 + if ((i+1) > max_enable)
16069 + max_enable = i+ 1;
16072 +static void pfm_intel_arch_setup_generic(unsigned int version,
16073 + unsigned int width,
16074 + unsigned int count)
16080 + * first we handle the generic counters:
16082 + * - ensure HW does not have more registers than hardcoded in the tables
16083 + * - adjust rsvd_msk to actual counter width
16084 + * - initialize enable_mask (list of PMC with start/stop capability)
16085 + * - mark unused hardcoded generic counters as unimplemented
16089 + * min of number of Hw counters and hardcoded in the tables
16091 + if (count >= PFM_IA_MAX_CNT) {
16092 + printk(KERN_INFO "perfmon: Limiting number of generic counters"
16093 + " to %u, HW supports %u",
16094 + PFM_IA_MAX_CNT, count);
16095 + count = PFM_IA_MAX_CNT;
16099 + * adjust rsvd_msk for generic counters based on actual width
16100 + * initialize enable_mask (1 per pmd)
16102 + rsvd = ~((1ULL << width)-1);
16103 + for (i = 0; i < count; i++) {
16104 + pfm_intel_arch_pmd_desc[i].rsvd_msk = rsvd;
16105 + set_enable_mask(i);
16109 + * handle version 3 new anythread bit (21)
16111 + if (version == 3) {
16112 + for (i = 0; i < count; i++)
16113 + pfm_intel_arch_pmc_desc[i].rsvd_msk &= ~(1ULL << 21);
16118 + * mark unused generic counters as not available
16120 + for (i = count ; i < PFM_IA_MAX_CNT; i++) {
16121 + pfm_intel_arch_pmd_desc[i].type = PFM_REG_NA;
16122 + pfm_intel_arch_pmc_desc[i].type = PFM_REG_NA;
16126 +static void pfm_intel_arch_setup_fixed(unsigned int version,
16127 + unsigned int width,
16128 + unsigned int count)
16134 + * handle the fixed counters (if any):
16136 + * - ensure HW does not have more registers than hardcoded in the tables
16137 + * - adjust rsvd_msk to actual counter width
16138 + * - initialize enable_mask (list of PMC with start/stop capability)
16139 + * - mark unused hardcoded generic counters as unimplemented
16141 + if (count >= PFM_IA_MAX_FCNT) {
16142 + printk(KERN_INFO "perfmon: Limiting number of fixed counters"
16143 + " to %u, HW supports %u",
16144 + PFM_IA_MAX_FCNT, count);
16145 + count = PFM_IA_MAX_FCNT;
16148 + * adjust rsvd_msk for fixed counters based on actual width
16150 + rsvd = ~((1ULL << width)-1);
16151 + for (i = 0; i < count; i++)
16152 + pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].rsvd_msk = rsvd;
16155 + * handle version new anythread bit (bit 2)
16157 + if (version == 3)
16158 + rsvd = 1ULL << 3;
16160 + rsvd = 3ULL << 2;
16162 + pfm_intel_arch_pmc_desc[16].rsvd_msk = 0;
16163 + for (i = 0; i < count; i++)
16164 + pfm_intel_arch_pmc_desc[16].rsvd_msk |= rsvd << (i<<2);
16167 + * mark unused fixed counters as unimplemented
16169 + * update the rsvd_msk, dfl_val in FIXED_CTRL:
16170 + * - rsvd_msk: set all 4 bits
16171 + * - dfl_val : clear all 4 bits
16173 + dfl = pfm_intel_arch_pmc_desc[16].dfl_val;
16174 + rsvd = pfm_intel_arch_pmc_desc[16].rsvd_msk;
16176 + for (i = count ; i < PFM_IA_MAX_FCNT; i++) {
16177 + pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].type = PFM_REG_NA;
16178 + rsvd |= 0xfULL << (i<<2);
16179 + dfl &= ~(0xfULL << (i<<2));
16183 + * FIXED_CTR_CTRL unavailable when no fixed counters are defined
16186 + pfm_intel_arch_pmc_desc[16].type = PFM_REG_NA;
16188 + /* update rsvd_mask and dfl_val */
16189 + pfm_intel_arch_pmc_desc[16].rsvd_msk = rsvd;
16190 + pfm_intel_arch_pmc_desc[16].dfl_val = dfl;
16191 + set_enable_mask(16);
16195 +static int pfm_intel_arch_probe_pmu(void)
16198 + unsigned int val;
16199 + struct pmu_eax eax;
16200 + struct pmu_edx edx;
16202 + unsigned int ebx, ecx;
16203 + unsigned int width = 0;
16207 + if (!(cpu_has_arch_perfmon || force)) {
16208 + PFM_INFO("no support for Intel architectural PMU");
16212 + if (!cpu_has_apic) {
16213 + PFM_INFO("no Local APIC, try rebooting with lapic option");
16217 + /* cpuid() call protected by cpu_has_arch_perfmon */
16218 + cpuid(0xa, &eax.val, &ebx, &ecx, &edx.val);
16221 + * reject processors supported by perfmon_intel_core
16223 + * We need to do this explicitely to avoid depending
16224 + * on the link order in case, the modules are compiled as
16227 + * non Intel processors are rejected by cpu_has_arch_perfmon
16229 + if (current_cpu_data.x86 == 6 && !force) {
16230 + switch (current_cpu_data.x86_model) {
16231 + case 15: /* Merom: use perfmon_intel_core */
16232 + case 23: /* Penryn: use perfmon_intel_core */
16240 + * some 6/15 models have buggy BIOS
16242 + if (eax.eax.version == 0
16243 + && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) {
16244 + PFM_INFO("buggy v2 BIOS, adjusting for 2 generic counters");
16245 + eax.eax.version = 2;
16246 + eax.eax.num_cnt = 2;
16247 + eax.eax.cnt_width = 40;
16251 + * Intel Atom processors have a buggy firmware which does not report
16252 + * the correct number of fixed counters
16254 + if (eax.eax.version == 3 && edx.edx.num_cnt < 3
16255 + && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 28) {
16256 + PFM_INFO("buggy v3 BIOS, adjusting for 3 fixed counters");
16257 + edx.edx.num_cnt = 3;
16261 + * some v2 BIOSes are incomplete
16263 + if (eax.eax.version == 2 && !edx.edx.num_cnt) {
16264 + PFM_INFO("buggy v2 BIOS, adjusting for 3 fixed counters");
16265 + edx.edx.num_cnt = 3;
16266 + edx.edx.cnt_width = 40;
16270 + * no fixed counters on earlier versions
16272 + if (eax.eax.version < 2) {
16276 + * use the min value of both widths until we support
16277 + * variable width counters
16279 + width = eax.eax.cnt_width < edx.edx.cnt_width ?
16280 + eax.eax.cnt_width : edx.edx.cnt_width;
16283 + PFM_INFO("detected architecural perfmon v%d", eax.eax.version);
16284 + PFM_INFO("num_gen=%d width=%d num_fixed=%d width=%d",
16286 + eax.eax.cnt_width,
16288 + edx.edx.cnt_width);
16291 + pfm_intel_arch_setup_generic(eax.eax.version,
16293 + eax.eax.num_cnt);
16295 + pfm_intel_arch_setup_fixed(eax.eax.version,
16297 + edx.edx.num_cnt);
16300 + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_USE_NMI;
16302 + pfm_intel_arch_check_errata();
16308 + * pfm_intel_arch_has_ovfls - check for pending overflow condition
16309 + * @ctx: context to work on
16311 + * detect if counters have overflowed.
16313 + * 0 : no overflow
16314 + * 1 : at least one overflow
16316 +static int __kprobes pfm_intel_arch_has_ovfls(struct pfm_context *ctx)
16322 + cnt_mask = ctx->regs.cnt_pmds;
16323 + num = ctx->regs.num_counters;
16324 + wmask = 1ULL << pfm_pmu_conf->counter_width;
16327 + * we can leverage the fact that we know the mapping
16328 + * to hardcode the MSR address and avoid accessing
16329 + * more cachelines
16331 + * We need to check cnt_mask because not all registers
16332 + * may be available.
16334 + for (i = 0; num; i++) {
16335 + if (test_bit(i, cast_ulp(cnt_mask))) {
16336 + rdmsrl(pfm_intel_arch_pmd_desc[i].hw_addr, val);
16337 + if (!(val & wmask))
16345 +static int pfm_intel_arch_stop_save(struct pfm_context *ctx,
16346 + struct pfm_event_set *set)
16348 + u64 used_mask[PFM_PMC_BV];
16350 + u64 val, wmask, ovfl_mask;
16353 + wmask = 1ULL << pfm_pmu_conf->counter_width;
16355 + bitmap_and(cast_ulp(used_mask),
16356 + cast_ulp(set->used_pmcs),
16357 + cast_ulp(enable_mask),
16360 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
16363 + * stop monitoring
16364 + * Unfortunately, this is very expensive!
16365 + * wrmsrl() is serializing.
16367 + for (i = 0; count; i++) {
16368 + if (test_bit(i, cast_ulp(used_mask))) {
16369 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
16375 + * if we already having a pending overflow condition, we simply
16376 + * return to take care of this first.
16378 + if (set->npend_ovfls)
16381 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
16382 + cnt_pmds = ctx->regs.cnt_pmds;
16385 + * check for pending overflows and save PMDs (combo)
16386 + * we employ used_pmds because we also need to save
16387 + * and not just check for pending interrupts.
16389 + * Must check for counting PMDs because of virtual PMDs
16391 + count = set->nused_pmds;
16392 + for (i = 0; count; i++) {
16393 + if (test_bit(i, cast_ulp(set->used_pmds))) {
16394 + val = pfm_arch_read_pmd(ctx, i);
16395 + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
16396 + if (!(val & wmask)) {
16397 + __set_bit(i, cast_ulp(set->povfl_pmds));
16398 + set->npend_ovfls++;
16400 + val = (set->pmds[i].value & ~ovfl_mask)
16401 + | (val & ovfl_mask);
16403 + set->pmds[i].value = val;
16407 + /* 0 means: no need to save PMDs at upper level */
16412 + * pfm_intel_arch_quiesce - stop monitoring without grabbing any lock
16414 + * called from NMI interrupt handler to immediately stop monitoring
16415 + * cannot grab any lock, including perfmon related locks
16417 +static void __kprobes pfm_intel_arch_quiesce(void)
16422 + * PMC16 is the fixed control control register so it has a
16423 + * distinct MSR address
16425 + * We do not use the hw_addr field in the table to avoid touching
16426 + * too many cachelines
16428 + for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) {
16429 + if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) {
16431 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
16433 + wrmsrl(MSR_P6_EVNTSEL0+i, 0);
16439 + * pfm_intel_arch_restore_pmcs - reload PMC registers
16440 + * @ctx: context to restore from
16441 + * @set: current event set
16443 + * optimized version of pfm_arch_restore_pmcs(). On architectural perfmon,
16444 + * we can afford to only restore the pmcs registers we use, because they
16445 + * are all independent from each other.
16447 +static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx,
16448 + struct pfm_event_set *set)
16453 + mask = set->used_pmcs;
16454 + num = set->nused_pmcs;
16455 + for (i = 0; num; i++) {
16456 + if (test_bit(i, cast_ulp(mask))) {
16457 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]);
16463 + * Counters may have model-specific width. Yet the documentation says
16464 + * that only the lower 32 bits can be written to due to the specification
16465 + * of wrmsr. bits [32-(w-1)] are sign extensions of bit 31. Bits [w-63] must
16466 + * not be set (see rsvd_msk for PMDs). As such the effective width of a
16467 + * counter is 31 bits only regardless of what CPUID.0xa returns.
16469 + * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18
16471 +static struct pfm_pmu_config pfm_intel_arch_pmu_conf = {
16472 + .pmu_name = "Intel architectural",
16473 + .pmd_desc = pfm_intel_arch_pmd_desc,
16474 + .counter_width = 31,
16475 + .num_pmc_entries = PFM_IA_MAX_PMCS,
16476 + .num_pmd_entries = PFM_IA_MAX_PMDS,
16477 + .pmc_desc = pfm_intel_arch_pmc_desc,
16478 + .probe_pmu = pfm_intel_arch_probe_pmu,
16479 + .version = "1.0",
16480 + .flags = PFM_PMU_BUILTIN_FLAG,
16481 + .owner = THIS_MODULE,
16482 + .pmu_info = &pfm_intel_arch_pmu_info
16485 +static int __init pfm_intel_arch_pmu_init_module(void)
16487 + return pfm_pmu_register(&pfm_intel_arch_pmu_conf);
16490 +static void __exit pfm_intel_arch_pmu_cleanup_module(void)
16492 + pfm_pmu_unregister(&pfm_intel_arch_pmu_conf);
16495 +module_init(pfm_intel_arch_pmu_init_module);
16496 +module_exit(pfm_intel_arch_pmu_cleanup_module);
16497 diff --git a/arch/x86/perfmon/perfmon_intel_atom.c b/arch/x86/perfmon/perfmon_intel_atom.c
16498 new file mode 100644
16499 index 0000000..9b94863
16501 +++ b/arch/x86/perfmon/perfmon_intel_atom.c
16504 + * perfmon support for Intel Atom (architectural perfmon v3 + PEBS)
16506 + * Copyright (c) 2008 Google,Inc
16507 + * Contributed by Stephane Eranian <eranian@gmail.com>
16509 + * This program is free software; you can redistribute it and/or
16510 + * modify it under the terms of version 2 of the GNU General Public
16511 + * License as published by the Free Software Foundation.
16513 + * This program is distributed in the hope that it will be useful,
16514 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16515 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16516 + * General Public License for more details.
16518 + * You should have received a copy of the GNU General Public License
16519 + * along with this program; if not, write to the Free Software
16520 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
16523 +#include <linux/module.h>
16524 +#include <linux/kprobes.h>
16525 +#include <linux/perfmon_kern.h>
16526 +#include <asm/msr.h>
16528 +MODULE_AUTHOR("Stephane Eranian <eranian@gmail.com>");
16529 +MODULE_DESCRIPTION("Intel Atom");
16530 +MODULE_LICENSE("GPL");
16532 +static int force, force_nmi;
16533 +MODULE_PARM_DESC(force, "bool: force module to load succesfully");
16534 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
16535 +module_param(force, bool, 0600);
16536 +module_param(force_nmi, bool, 0600);
16539 + * - upper 32 bits are reserved
16540 + * - INT: APIC enable bit is reserved (forced to 1)
16542 + * RSVD: reserved bits are 1
16544 +#define PFM_ATOM_PMC_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20))
16547 + * force Local APIC interrupt on overflow
16548 + * disable with NO_EMUL64
16550 +#define PFM_ATOM_PMC_VAL (1ULL<<20)
16551 +#define PFM_ATOM_NO64 (1ULL<<20)
16554 + * Atom counters are 40-bits. 40-bits can be read but ony 31 can be written
16555 + * to due to a limitation of wrmsr. Bits [[63-32] are sign extensions of bit 31.
16556 + * Bits [63-40] must not be set
16558 + * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18
16560 +#define PFM_ATOM_PMD_WIDTH 31
16561 +#define PFM_ATOM_PMD_RSVD ~((1ULL << 40)-1)
16563 +static void pfm_intel_atom_acquire_pmu_percpu(void);
16564 +static void pfm_intel_atom_release_pmu_percpu(void);
16565 +static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx,
16566 + struct pfm_event_set *set);
16567 +static int pfm_intel_atom_stop_save(struct pfm_context *ctx,
16568 + struct pfm_event_set *set);
16569 +static int pfm_intel_atom_has_ovfls(struct pfm_context *ctx);
16570 +static void __kprobes pfm_intel_atom_quiesce(void);
16572 +struct pfm_arch_pmu_info pfm_intel_atom_pmu_info = {
16573 + .stop_save = pfm_intel_atom_stop_save,
16574 + .has_ovfls = pfm_intel_atom_has_ovfls,
16575 + .quiesce = pfm_intel_atom_quiesce,
16576 + .restore_pmcs = pfm_intel_atom_restore_pmcs,
16577 + .acquire_pmu_percpu = pfm_intel_atom_acquire_pmu_percpu,
16578 + .release_pmu_percpu = pfm_intel_atom_release_pmu_percpu
16582 +#define PFM_ATOM_C(n) { \
16583 + .type = PFM_REG_I64, \
16584 + .desc = "PERFEVTSEL"#n, \
16585 + .dfl_val = PFM_ATOM_PMC_VAL, \
16586 + .rsvd_msk = PFM_ATOM_PMC_RSVD, \
16587 + .no_emul64_msk = PFM_ATOM_NO64, \
16588 + .hw_addr = MSR_P6_EVNTSEL0 + (n) \
16592 +static struct pfm_regmap_desc pfm_intel_atom_pmc_desc[] = {
16593 +/* pmc0 */ PFM_ATOM_C(0),
16594 +/* pmc1 */ PFM_ATOM_C(1),
16595 +/* pmc2 */ PMX_NA, PMX_NA,
16596 +/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16597 +/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16598 +/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16599 +/* pmc16 */ { .type = PFM_REG_I,
16600 + .desc = "FIXED_CTRL",
16601 + .dfl_val = 0x0000000000000888ULL, /* force PMI */
16602 + .rsvd_msk = 0xfffffffffffffcccULL, /* 3 fixed counters defined */
16603 + .no_emul64_msk = 0,
16604 + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
16606 +/* pmc17 */{ .type = PFM_REG_W,
16607 + .desc = "PEBS_ENABLE",
16609 + .rsvd_msk = 0xfffffffffffffffeULL,
16610 + .no_emul64_msk = 0,
16611 + .hw_addr = MSR_IA32_PEBS_ENABLE
16614 +#define PFM_ATOM_MAX_PMCS ARRAY_SIZE(pfm_intel_atom_pmc_desc)
16616 +#define PFM_ATOM_D(n) \
16617 + { .type = PFM_REG_C, \
16618 + .desc = "PMC"#n, \
16619 + .rsvd_msk = PFM_ATOM_PMD_RSVD, \
16620 + .hw_addr = MSR_P6_PERFCTR0+n, \
16621 + .dep_pmcs[0] = 1ULL << n \
16624 +#define PFM_ATOM_FD(n) \
16625 + { .type = PFM_REG_C, \
16626 + .desc = "FIXED_CTR"#n, \
16627 + .rsvd_msk = PFM_ATOM_PMD_RSVD, \
16628 + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
16629 + .dep_pmcs[0] = 1ULL << 16 \
16632 +static struct pfm_regmap_desc pfm_intel_atom_pmd_desc[] = {
16633 +/* pmd0 */ PFM_ATOM_D(0),
16634 +/* pmd1 */ PFM_ATOM_D(1),
16635 +/* pmd2 */ PMX_NA,
16636 +/* pmd3 */ PMX_NA,
16637 +/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16638 +/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16639 +/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16640 +/* pmd16 */ PFM_ATOM_FD(0),
16641 +/* pmd17 */ PFM_ATOM_FD(1),
16642 +/* pmd18 */ PFM_ATOM_FD(2)
16644 +#define PFM_ATOM_MAX_PMDS ARRAY_SIZE(pfm_intel_atom_pmd_desc)
16646 +static struct pfm_pmu_config pfm_intel_atom_pmu_conf;
16648 +static int pfm_intel_atom_probe_pmu(void)
16653 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL)
16656 + if (current_cpu_data.x86 != 6)
16659 + if (current_cpu_data.x86_model != 28)
16663 + * having APIC is mandatory, so disregard force option
16665 + if (!cpu_has_apic) {
16666 + PFM_INFO("no Local APIC, try rebooting with lapic option");
16670 + PFM_INFO("detected Intel Atom PMU");
16673 + pfm_intel_atom_pmu_info.flags |= PFM_X86_FL_USE_NMI;
16679 + * pfm_intel_atom_has_ovfls - check for pending overflow condition
16680 + * @ctx: context to work on
16682 + * detect if counters have overflowed.
16684 + * 0 : no overflow
16685 + * 1 : at least one overflow
16687 +static int __kprobes pfm_intel_atom_has_ovfls(struct pfm_context *ctx)
16689 + struct pfm_regmap_desc *d;
16692 + d = pfm_pmu_conf->pmd_desc;
16694 + * read global overflow status register
16695 + * if sharing PMU, then not all bit are ours so must
16696 + * check only the ones we actually use
16698 + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf);
16701 + * for pmd0, we also check PEBS overflow on bit 62
16703 + if ((d[0].type & PFM_REG_I) && (ovf & ((1ull << 62) | 1ull)))
16706 + if ((d[1].type & PFM_REG_I) && (ovf & 2ull))
16709 + if ((d[16].type & PFM_REG_I) && (ovf & (1ull << 32)))
16712 + if ((d[17].type & PFM_REG_I) && (ovf & (2ull << 32)))
16715 + if ((d[18].type & PFM_REG_I) && (ovf & (4ull << 32)))
16722 + * pfm_intel_atom_stop_save - stop monitoring, collect pending overflow, save pmds
16723 + * @ctx: context to work on
16724 + * @set: active set
16727 + * 1: caller needs to save pmds
16728 + * 0: caller does not need to save pmds, they have been saved by this call
16730 +static int pfm_intel_atom_stop_save(struct pfm_context *ctx,
16731 + struct pfm_event_set *set)
16733 +#define PFM_ATOM_WMASK (1ULL << 31)
16734 +#define PFM_ATOM_OMASK ((1ULL << 31)-1)
16735 + u64 clear_ovf = 0;
16736 + u64 ovf, ovf2, val;
16739 + * read global overflow status register
16740 + * if sharing PMU, then not all bit are ours so must
16741 + * check only the ones we actually use.
16743 + * XXX: Atom seems to have a bug with the stickyness of
16744 + * GLOBAL_STATUS. If we read GLOBAL_STATUS after we
16745 + * clear the generic counters, then their bits in
16746 + * GLOBAL_STATUS are cleared. This should not be the
16747 + * case accoding to architected PMU. To workaround
16748 + * the problem, we read GLOBAL_STATUS BEFORE we stop
16749 + * all monitoring.
16751 + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf);
16754 + * stop monitoring
16756 + if (test_bit(0, cast_ulp(set->used_pmcs)))
16757 + wrmsrl(MSR_P6_EVNTSEL0, 0);
16759 + if (test_bit(1, cast_ulp(set->used_pmcs)))
16760 + wrmsrl(MSR_P6_EVNTSEL1, 0);
16762 + if (test_bit(16, cast_ulp(set->used_pmcs)))
16763 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
16765 + if (test_bit(17, cast_ulp(set->used_pmcs)))
16766 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
16769 + * XXX: related to bug mentioned above
16771 + * read GLOBAL_STATUS again to avoid race condition
16772 + * with overflows happening after first read and
16773 + * before stop. That avoids missing overflows on
16774 + * the fixed counters and PEBS
16776 + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf2);
16780 + * if we already have a pending overflow condition, we simply
16781 + * return to take care of it first.
16783 + if (set->npend_ovfls)
16787 + * check PMD 0,1,16,17,18 for overflow and save their value
16789 + if (test_bit(0, cast_ulp(set->used_pmds))) {
16790 + rdmsrl(MSR_P6_PERFCTR0, val);
16791 + if (ovf & ((1ull<<62)|1ull)) {
16792 + __set_bit(0, cast_ulp(set->povfl_pmds));
16793 + set->npend_ovfls++;
16794 + clear_ovf = (1ull << 62) | 1ull;
16796 + val = (set->pmds[0].value & ~PFM_ATOM_OMASK)
16797 + | (val & PFM_ATOM_OMASK);
16798 + set->pmds[0].value = val;
16801 + if (test_bit(1, cast_ulp(set->used_pmds))) {
16802 + rdmsrl(MSR_P6_PERFCTR1, val);
16803 + if (ovf & 2ull) {
16804 + __set_bit(1, cast_ulp(set->povfl_pmds));
16805 + set->npend_ovfls++;
16806 + clear_ovf |= 2ull;
16808 + val = (set->pmds[1].value & ~PFM_ATOM_OMASK)
16809 + | (val & PFM_ATOM_OMASK);
16810 + set->pmds[1].value = val;
16813 + if (test_bit(16, cast_ulp(set->used_pmds))) {
16814 + rdmsrl(MSR_CORE_PERF_FIXED_CTR0, val);
16815 + if (ovf & (1ull << 32)) {
16816 + __set_bit(16, cast_ulp(set->povfl_pmds));
16817 + set->npend_ovfls++;
16818 + clear_ovf |= 1ull << 32;
16820 + val = (set->pmds[16].value & ~PFM_ATOM_OMASK)
16821 + | (val & PFM_ATOM_OMASK);
16822 + set->pmds[16].value = val;
16825 + if (test_bit(17, cast_ulp(set->used_pmds))) {
16826 + rdmsrl(MSR_CORE_PERF_FIXED_CTR0+1, val);
16827 + if (ovf & (2ull << 32)) {
16828 + __set_bit(17, cast_ulp(set->povfl_pmds));
16829 + set->npend_ovfls++;
16830 + clear_ovf |= 2ull << 32;
16832 + val = (set->pmds[17].value & ~PFM_ATOM_OMASK)
16833 + | (val & PFM_ATOM_OMASK);
16834 + set->pmds[17].value = val;
16837 + if (test_bit(18, cast_ulp(set->used_pmds))) {
16838 + rdmsrl(MSR_CORE_PERF_FIXED_CTR0+2, val);
16839 + if (ovf & (4ull << 32)) {
16840 + __set_bit(18, cast_ulp(set->povfl_pmds));
16841 + set->npend_ovfls++;
16842 + clear_ovf |= 4ull << 32;
16844 + val = (set->pmds[18].value & ~PFM_ATOM_OMASK)
16845 + | (val & PFM_ATOM_OMASK);
16846 + set->pmds[18].value = val;
16850 + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf);
16852 + /* 0 means: no need to save PMDs at upper level */
16857 + * pfm_intel_atom_quiesce - stop monitoring without grabbing any lock
16859 + * called from NMI interrupt handler to immediately stop monitoring
16860 + * cannot grab any lock, including perfmon related locks
16862 +static void __kprobes pfm_intel_atom_quiesce(void)
16865 + * quiesce PMU by clearing available registers that have
16866 + * the start/stop capability
16868 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16869 + wrmsrl(MSR_P6_EVNTSEL0, 0);
16871 + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16872 + wrmsrl(MSR_P6_EVNTSEL1, 0);
16874 + if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16875 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
16877 + if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16878 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
16882 + * pfm_intel_atom_restore_pmcs - reload PMC registers
16883 + * @ctx: context to restore from
16884 + * @set: current event set
16886 + * restores pmcs and also PEBS Data Save area pointer
16888 +static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx,
16889 + struct pfm_event_set *set)
16891 + struct pfm_arch_context *ctx_arch;
16892 + u64 clear_ovf = 0;
16894 + ctx_arch = pfm_ctx_arch(ctx);
16896 + * must restore DS pointer before restoring PMCs
16897 + * as this can potentially reactivate monitoring
16899 + if (ctx_arch->flags.use_ds)
16900 + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
16902 + if (test_bit(0, cast_ulp(set->used_pmcs))) {
16903 + wrmsrl(MSR_P6_EVNTSEL0, set->pmcs[0]);
16904 + clear_ovf = 1ull;
16907 + if (test_bit(1, cast_ulp(set->used_pmcs))) {
16908 + wrmsrl(MSR_P6_EVNTSEL1, set->pmcs[1]);
16909 + clear_ovf |= 2ull;
16912 + if (test_bit(16, cast_ulp(set->used_pmcs))) {
16913 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, set->pmcs[16]);
16914 + clear_ovf |= 7ull << 32;
16917 + if (test_bit(17, cast_ulp(set->used_pmcs))) {
16918 + wrmsrl(MSR_IA32_PEBS_ENABLE, set->pmcs[17]);
16919 + clear_ovf |= 1ull << 62;
16923 + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf);
16926 +static int pfm_intel_atom_pmc17_check(struct pfm_context *ctx,
16927 + struct pfm_event_set *set,
16928 + struct pfarg_pmc *req)
16930 + struct pfm_arch_context *ctx_arch;
16931 + ctx_arch = pfm_ctx_arch(ctx);
16934 + * if user activates PEBS_ENABLE, then we need to have a valid
16935 + * DS Area setup. This only happens when the PEBS sampling format is
16936 + * used in which case PFM_X86_USE_PEBS is set. We must reject all other
16939 + * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears
16940 + * that a value of 0 for this MSR does crash the system with
16943 + if (!ctx_arch->flags.use_pebs && req->reg_value) {
16944 + PFM_DBG("pmc17 useable only with a PEBS sampling format");
16950 +DEFINE_PER_CPU(u64, saved_global_ctrl);
16953 + * pfm_intel_atom_acquire_pmu_percpu - acquire PMU resource per CPU
16955 + * For Atom, it is necessary to enable all available
16956 + * registers. The firmware rightfully has the fixed counters
16957 + * disabled for backward compatibility with architectural perfmon
16960 + * This function is invoked on each online CPU
16962 +static void pfm_intel_atom_acquire_pmu_percpu(void)
16964 + struct pfm_regmap_desc *d;
16969 + * build bitmask of registers that are available to
16970 + * us. In some cases, there may be fewer registers than
16971 + * what Atom supports due to sharing with other kernel
16972 + * subsystems, such as NMI
16974 + d = pfm_pmu_conf->pmd_desc;
16975 + for (i=0; i < 16; i++) {
16976 + if ((d[i].type & PFM_REG_I) == 0)
16978 + mask |= 1ull << i;
16980 + for (i=16; i < PFM_ATOM_MAX_PMDS; i++) {
16981 + if ((d[i].type & PFM_REG_I) == 0)
16983 + mask |= 1ull << (32+i-16);
16987 + * keep a local copy of the current MSR_CORE_PERF_GLOBAL_CTRL
16989 + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl));
16991 + PFM_DBG("global=0x%llx set to 0x%llx",
16992 + __get_cpu_var(saved_global_ctrl),
16996 + * enable all registers
16998 + * No need to quiesce PMU. If there is a overflow, it will be
16999 + * treated as spurious by the handler
17001 + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, mask);
17005 + * pfm_intel_atom_release_pmu_percpu - release PMU resource per CPU
17007 + * For Atom, we restore MSR_CORE_PERF_GLOBAL_CTRL to its orginal value
17009 +static void pfm_intel_atom_release_pmu_percpu(void)
17011 + PFM_DBG("global_ctrl restored to 0x%llx\n",
17012 + __get_cpu_var(saved_global_ctrl));
17014 + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl));
17017 +static struct pfm_pmu_config pfm_intel_atom_pmu_conf = {
17018 + .pmu_name = "Intel Atom",
17019 + .pmd_desc = pfm_intel_atom_pmd_desc,
17020 + .counter_width = PFM_ATOM_PMD_WIDTH,
17021 + .num_pmc_entries = PFM_ATOM_MAX_PMCS,
17022 + .num_pmd_entries = PFM_ATOM_MAX_PMDS,
17023 + .pmc_desc = pfm_intel_atom_pmc_desc,
17024 + .probe_pmu = pfm_intel_atom_probe_pmu,
17025 + .version = "1.0",
17026 + .flags = PFM_PMU_BUILTIN_FLAG,
17027 + .owner = THIS_MODULE,
17028 + .pmc_write_check = pfm_intel_atom_pmc17_check,
17029 + .pmu_info = &pfm_intel_atom_pmu_info
17032 +static int __init pfm_intel_atom_pmu_init_module(void)
17034 + return pfm_pmu_register(&pfm_intel_atom_pmu_conf);
17037 +static void __exit pfm_intel_atom_pmu_cleanup_module(void)
17039 + pfm_pmu_unregister(&pfm_intel_atom_pmu_conf);
17042 +module_init(pfm_intel_atom_pmu_init_module);
17043 +module_exit(pfm_intel_atom_pmu_cleanup_module);
17044 diff --git a/arch/x86/perfmon/perfmon_intel_core.c b/arch/x86/perfmon/perfmon_intel_core.c
17045 new file mode 100644
17046 index 0000000..fddc436
17048 +++ b/arch/x86/perfmon/perfmon_intel_core.c
17051 + * This file contains the Intel Core PMU registers description tables.
17052 + * Intel Core-based processors support architectural perfmon v2 + PEBS
17054 + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P.
17055 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
17057 +#include <linux/module.h>
17058 +#include <linux/kprobes.h>
17059 +#include <linux/perfmon_kern.h>
17060 +#include <linux/nmi.h>
17062 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
17063 +MODULE_DESCRIPTION("Intel Core");
17064 +MODULE_LICENSE("GPL");
17066 +static int force_nmi;
17067 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
17068 +module_param(force_nmi, bool, 0600);
17071 + * - upper 32 bits are reserved
17072 + * - INT: APIC enable bit is reserved (forced to 1)
17073 + * - bit 21 is reserved
17075 + * RSVD: reserved bits must be 1
17077 +#define PFM_CORE_PMC_RSVD ((~((1ULL<<32)-1)) \
17082 + * Core counters are 40-bits
17084 +#define PFM_CORE_CTR_RSVD (~((1ULL<<40)-1))
17087 + * force Local APIC interrupt on overflow
17088 + * disable with NO_EMUL64
17090 +#define PFM_CORE_PMC_VAL (1ULL<<20)
17091 +#define PFM_CORE_NO64 (1ULL<<20)
17093 +#define PFM_CORE_NA { .reg_type = PFM_REGT_NA}
17095 +#define PFM_CORE_CA(m, c, t) \
17102 +struct pfm_ds_area_intel_core {
17103 + u64 bts_buf_base;
17106 + u64 bts_intr_thres;
17107 + u64 pebs_buf_base;
17109 + u64 pebs_abs_max;
17110 + u64 pebs_intr_thres;
17111 + u64 pebs_cnt_reset;
17114 +static void pfm_core_restore_pmcs(struct pfm_context *ctx,
17115 + struct pfm_event_set *set);
17116 +static int pfm_core_has_ovfls(struct pfm_context *ctx);
17117 +static int pfm_core_stop_save(struct pfm_context *ctx,
17118 + struct pfm_event_set *set);
17119 +static void __kprobes pfm_core_quiesce(void);
17121 +static u64 enable_mask[PFM_MAX_PMCS];
17122 +static u16 max_enable;
17124 +struct pfm_arch_pmu_info pfm_core_pmu_info = {
17125 + .stop_save = pfm_core_stop_save,
17126 + .has_ovfls = pfm_core_has_ovfls,
17127 + .quiesce = pfm_core_quiesce,
17128 + .restore_pmcs = pfm_core_restore_pmcs
17131 +static struct pfm_regmap_desc pfm_core_pmc_desc[] = {
17133 + .type = PFM_REG_I64,
17134 + .desc = "PERFEVTSEL0",
17135 + .dfl_val = PFM_CORE_PMC_VAL,
17136 + .rsvd_msk = PFM_CORE_PMC_RSVD,
17137 + .no_emul64_msk = PFM_CORE_NO64,
17138 + .hw_addr = MSR_P6_EVNTSEL0
17141 + .type = PFM_REG_I64,
17142 + .desc = "PERFEVTSEL1",
17143 + .dfl_val = PFM_CORE_PMC_VAL,
17144 + .rsvd_msk = PFM_CORE_PMC_RSVD,
17145 + .no_emul64_msk = PFM_CORE_NO64,
17146 + .hw_addr = MSR_P6_EVNTSEL1
17148 +/* pmc2 */ PMX_NA, PMX_NA,
17149 +/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17150 +/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17151 +/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17152 +/* pmc16 */ { .type = PFM_REG_I,
17153 + .desc = "FIXED_CTRL",
17154 + .dfl_val = 0x888ULL,
17155 + .rsvd_msk = 0xfffffffffffffcccULL,
17156 + .no_emul64_msk = 0,
17157 + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
17159 +/* pmc17 */ { .type = PFM_REG_W,
17160 + .desc = "PEBS_ENABLE",
17162 + .rsvd_msk = 0xfffffffffffffffeULL,
17163 + .no_emul64_msk = 0,
17164 + .hw_addr = MSR_IA32_PEBS_ENABLE
17168 +#define PFM_CORE_D(n) \
17169 + { .type = PFM_REG_C, \
17170 + .desc = "PMC"#n, \
17171 + .rsvd_msk = PFM_CORE_CTR_RSVD, \
17172 + .hw_addr = MSR_P6_PERFCTR0+n, \
17173 + .dep_pmcs[0] = 1ULL << n \
17176 +#define PFM_CORE_FD(n) \
17177 + { .type = PFM_REG_C, \
17178 + .desc = "FIXED_CTR"#n, \
17179 + .rsvd_msk = PFM_CORE_CTR_RSVD, \
17180 + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
17181 + .dep_pmcs[0] = 1ULL << 16 \
17184 +static struct pfm_regmap_desc pfm_core_pmd_desc[] = {
17185 +/* pmd0 */ PFM_CORE_D(0),
17186 +/* pmd1 */ PFM_CORE_D(1),
17187 +/* pmd2 */ PMX_NA, PMX_NA,
17188 +/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17189 +/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17190 +/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17191 +/* pmd16 */ PFM_CORE_FD(0),
17192 +/* pmd17 */ PFM_CORE_FD(1),
17193 +/* pmd18 */ PFM_CORE_FD(2)
17195 +#define PFM_CORE_NUM_PMCS ARRAY_SIZE(pfm_core_pmc_desc)
17196 +#define PFM_CORE_NUM_PMDS ARRAY_SIZE(pfm_core_pmd_desc)
17198 +static struct pfm_pmu_config pfm_core_pmu_conf;
17200 +static int pfm_core_probe_pmu(void)
17203 + * Check for Intel Core processor explicitely
17204 + * Checking for cpu_has_perfmon is not enough as this
17205 + * matches intel Core Duo/Core Solo but none supports
17208 + * Intel Core = arch perfmon v2 + PEBS
17210 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
17211 + PFM_INFO("not an AMD processor");
17215 + if (current_cpu_data.x86 != 6)
17218 + switch (current_cpu_data.x86_model) {
17219 + case 15: /* Merom */
17221 + case 23: /* Penryn */
17223 + case 29: /* Dunnington */
17229 + if (!cpu_has_apic) {
17230 + PFM_INFO("no Local APIC, unsupported");
17234 + PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d",
17235 + nmi_watchdog, atomic_read(&nmi_active), force_nmi);
17238 + * Intel Core processors implement DS and PEBS, no need to check
17240 + if (cpu_has_pebs)
17241 + PFM_INFO("PEBS supported, enabled");
17244 + * initialize bitmask of register with enable capability, i.e.,
17245 + * startstop. This is used to restrict the number of registers to
17246 + * touch on start/stop
17247 + * max_enable: number of bits to scan in enable_mask = highest + 1
17249 + * may be adjusted in pfm_arch_pmu_acquire()
17251 + __set_bit(0, cast_ulp(enable_mask));
17252 + __set_bit(1, cast_ulp(enable_mask));
17253 + __set_bit(16, cast_ulp(enable_mask));
17254 + __set_bit(17, cast_ulp(enable_mask));
17255 + max_enable = 17+1;
17258 + pfm_core_pmu_info.flags |= PFM_X86_FL_USE_NMI;
17263 +static int pfm_core_pmc17_check(struct pfm_context *ctx,
17264 + struct pfm_event_set *set,
17265 + struct pfarg_pmc *req)
17267 + struct pfm_arch_context *ctx_arch;
17268 + ctx_arch = pfm_ctx_arch(ctx);
17271 + * if user activates PEBS_ENABLE, then we need to have a valid
17272 + * DS Area setup. This only happens when the PEBS sampling format is
17273 + * used in which case PFM_X86_USE_PEBS is set. We must reject all other
17276 + * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears
17277 + * that a value of 0 for this MSR does crash the system with
17280 + if (!ctx_arch->flags.use_pebs && req->reg_value) {
17281 + PFM_DBG("pmc17 useable only with a PEBS sampling format");
17288 + * detect is counters have overflowed.
17290 + * 0 : no overflow
17291 + * 1 : at least one overflow
17293 + * used by Intel Core-based processors
17295 +static int __kprobes pfm_core_has_ovfls(struct pfm_context *ctx)
17297 + struct pfm_arch_pmu_info *pmu_info;
17302 + pmu_info = &pfm_core_pmu_info;
17303 + cnt_mask = ctx->regs.cnt_pmds;
17304 + num = ctx->regs.num_counters;
17305 + wmask = 1ULL << pfm_pmu_conf->counter_width;
17307 + for (i = 0; num; i++) {
17308 + if (test_bit(i, cast_ulp(cnt_mask))) {
17309 + rdmsrl(pfm_core_pmd_desc[i].hw_addr, val);
17310 + if (!(val & wmask))
17318 +static int pfm_core_stop_save(struct pfm_context *ctx,
17319 + struct pfm_event_set *set)
17321 + struct pfm_arch_context *ctx_arch;
17322 + struct pfm_ds_area_intel_core *ds = NULL;
17323 + u64 used_mask[PFM_PMC_BV];
17325 + u64 val, wmask, ovfl_mask;
17326 + u16 count, has_ovfl;
17327 + u16 i, pebs_idx = ~0;
17329 + ctx_arch = pfm_ctx_arch(ctx);
17331 + wmask = 1ULL << pfm_pmu_conf->counter_width;
17334 + * used enable pmc bitmask
17336 + bitmap_and(cast_ulp(used_mask),
17337 + cast_ulp(set->used_pmcs),
17338 + cast_ulp(enable_mask),
17341 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
17343 + * stop monitoring
17344 + * Unfortunately, this is very expensive!
17345 + * wrmsrl() is serializing.
17347 + for (i = 0; count; i++) {
17348 + if (test_bit(i, cast_ulp(used_mask))) {
17349 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
17354 + * if we already having a pending overflow condition, we simply
17355 + * return to take care of this first.
17357 + if (set->npend_ovfls)
17360 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
17361 + cnt_mask = ctx->regs.cnt_pmds;
17363 + if (ctx_arch->flags.use_pebs) {
17364 + ds = ctx_arch->ds_area;
17365 + pebs_idx = 0; /* PMC0/PMD0 */
17366 + PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx",
17368 + (unsigned long long)ds->pebs_index,
17369 + (unsigned long long)ds->pebs_intr_thres);
17373 + * Check for pending overflows and save PMDs (combo)
17374 + * We employ used_pmds and not intr_pmds because we must
17375 + * also saved on PMD registers.
17376 + * Must check for counting PMDs because of virtual PMDs
17378 + * XXX: should use the ovf_status register instead, yet
17379 + * we would have to check if NMI is used and fallback
17380 + * to individual pmd inspection.
17382 + count = set->nused_pmds;
17384 + for (i = 0; count; i++) {
17385 + if (test_bit(i, cast_ulp(set->used_pmds))) {
17386 + val = pfm_arch_read_pmd(ctx, i);
17387 + if (likely(test_bit(i, cast_ulp(cnt_mask)))) {
17388 + if (i == pebs_idx)
17389 + has_ovfl = (ds->pebs_index >=
17390 + ds->pebs_intr_thres);
17392 + has_ovfl = !(val & wmask);
17394 + __set_bit(i, cast_ulp(set->povfl_pmds));
17395 + set->npend_ovfls++;
17397 + val = (set->pmds[i].value & ~ovfl_mask)
17398 + | (val & ovfl_mask);
17400 + set->pmds[i].value = val;
17404 + /* 0 means: no need to save PMDs at upper level */
17409 + * pfm_core_quiesce - stop monitoring without grabbing any lock
17411 + * called from NMI interrupt handler to immediately stop monitoring
17412 + * cannot grab any lock, including perfmon related locks
17414 +static void __kprobes pfm_core_quiesce(void)
17417 + * quiesce PMU by clearing available registers that have
17418 + * the start/stop capability
17420 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17421 + wrmsrl(MSR_P6_EVNTSEL0, 0);
17422 + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17423 + wrmsrl(MSR_P6_EVNTSEL1, 0);
17424 + if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17425 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
17426 + if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17427 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
17430 + * pfm_core_restore_pmcs - reload PMC registers
17431 + * @ctx: context to restore from
17432 + * @set: current event set
17434 + * optimized version of pfm_arch_restore_pmcs(). On Core, we can
17435 + * afford to only restore the pmcs registers we use, because they are
17436 + * all independent from each other.
17438 +static void pfm_core_restore_pmcs(struct pfm_context *ctx,
17439 + struct pfm_event_set *set)
17441 + struct pfm_arch_context *ctx_arch;
17445 + ctx_arch = pfm_ctx_arch(ctx);
17448 + * must restore DS pointer before restoring PMCs
17449 + * as this can potentially reactivate monitoring
17451 + if (ctx_arch->flags.use_ds)
17452 + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
17454 + mask = set->used_pmcs;
17455 + num = set->nused_pmcs;
17456 + for (i = 0; num; i++) {
17457 + if (test_bit(i, cast_ulp(mask))) {
17458 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]);
17465 + * Counters may have model-specific width which can be probed using
17466 + * the CPUID.0xa leaf. Yet, the documentation says: "
17467 + * In the initial implementation, only the read bit width is reported
17468 + * by CPUID, write operations are limited to the low 32 bits.
17469 + * Bits [w-32] are sign extensions of bit 31. As such the effective width
17470 + * of a counter is 31 bits only.
17472 +static struct pfm_pmu_config pfm_core_pmu_conf = {
17473 + .pmu_name = "Intel Core",
17474 + .pmd_desc = pfm_core_pmd_desc,
17475 + .counter_width = 31,
17476 + .num_pmc_entries = PFM_CORE_NUM_PMCS,
17477 + .num_pmd_entries = PFM_CORE_NUM_PMDS,
17478 + .pmc_desc = pfm_core_pmc_desc,
17479 + .probe_pmu = pfm_core_probe_pmu,
17480 + .version = "1.2",
17481 + .flags = PFM_PMU_BUILTIN_FLAG,
17482 + .owner = THIS_MODULE,
17483 + .pmu_info = &pfm_core_pmu_info,
17484 + .pmc_write_check = pfm_core_pmc17_check
17487 +static int __init pfm_core_pmu_init_module(void)
17489 + return pfm_pmu_register(&pfm_core_pmu_conf);
17492 +static void __exit pfm_core_pmu_cleanup_module(void)
17494 + pfm_pmu_unregister(&pfm_core_pmu_conf);
17497 +module_init(pfm_core_pmu_init_module);
17498 +module_exit(pfm_core_pmu_cleanup_module);
17499 diff --git a/arch/x86/perfmon/perfmon_p4.c b/arch/x86/perfmon/perfmon_p4.c
17500 new file mode 100644
17501 index 0000000..1ffcf3c
17503 +++ b/arch/x86/perfmon/perfmon_p4.c
17506 + * This file contains the P4/Xeon PMU register description tables
17507 + * for both 32 and 64 bit modes.
17509 + * Copyright (c) 2005 Intel Corporation
17510 + * Contributed by Bryan Wilkerson <bryan.p.wilkerson@intel.com>
17512 + * This program is free software; you can redistribute it and/or
17513 + * modify it under the terms of version 2 of the GNU General Public
17514 + * License as published by the Free Software Foundation.
17516 + * This program is distributed in the hope that it will be useful,
17517 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
17518 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17519 + * General Public License for more details.
17521 + * You should have received a copy of the GNU General Public License
17522 + * along with this program; if not, write to the Free Software
17523 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17526 +#include <linux/module.h>
17527 +#include <linux/perfmon_kern.h>
17528 +#include <linux/kprobes.h>
17529 +#include <linux/nmi.h>
17530 +#include <asm/msr.h>
17531 +#include <asm/apic.h>
17533 +MODULE_AUTHOR("Bryan Wilkerson <bryan.p.wilkerson@intel.com>");
17534 +MODULE_DESCRIPTION("P4/Xeon/EM64T PMU description table");
17535 +MODULE_LICENSE("GPL");
17538 +MODULE_PARM_DESC(force, "bool: force module to load succesfully");
17539 +module_param(force, bool, 0600);
17541 +static int force_nmi;
17542 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
17543 +module_param(force_nmi, bool, 0600);
17546 + * For extended register information in addition to address that is used
17547 + * at runtime to figure out the mapping of reg addresses to logical procs
17548 + * and association of registers to hardware specific features
17550 +struct pfm_p4_regmap {
17552 + * one each for the logical CPUs. Index 0 corresponds to T0 and
17553 + * index 1 corresponds to T1. Index 1 can be zero if no T1
17554 + * complement reg exists.
17556 + unsigned long addrs[2]; /* 2 = number of threads */
17557 + unsigned int ctr; /* for CCCR/PERFEVTSEL, associated counter */
17558 + unsigned int reg_type;
17562 + * bitmask for pfm_p4_regmap.reg_type
17564 +#define PFM_REGT_NA 0x0000 /* not available */
17565 +#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */
17566 +#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */
17567 +#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */
17568 +#define PFM_REGT_PEBS 0x0010 /* PEBS related */
17569 +#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */
17570 +#define PFM_REGT_CTR 0x0040 /* counter */
17573 + * architecture specific context extension.
17574 + * located at: (struct pfm_arch_context *)(ctx+1)
17576 +struct pfm_arch_p4_context {
17577 + u32 npend_ovfls; /* P4 NMI #pending ovfls */
17579 + u64 povfl_pmds[PFM_PMD_BV]; /* P4 NMI overflowed counters */
17580 + u64 saved_cccrs[PFM_MAX_PMCS];
17584 + * ESCR reserved bitmask:
17585 + * - bits 31 - 63 reserved
17586 + * - T1_OS and T1_USR bits are reserved - set depending on logical proc
17587 + * user mode application should use T0_OS and T0_USR to indicate
17588 + * RSVD: reserved bits must be 1
17590 +#define PFM_ESCR_RSVD ~0x000000007ffffffcULL
17593 + * CCCR default value:
17594 + * - OVF_PMI_T0=1 (bit 26)
17595 + * - OVF_PMI_T1=0 (bit 27) (set if necessary in pfm_write_reg())
17596 + * - all other bits are zero
17598 + * OVF_PMI is forced to zero if PFM_REGFL_NO_EMUL64 is set on CCCR
17600 +#define PFM_CCCR_DFL (1ULL<<26) | (3ULL<<16)
17603 + * CCCR reserved fields:
17604 + * - bits 0-11, 25-29, 31-63
17605 + * - OVF_PMI (26-27), override with REGFL_NO_EMUL64
17607 + * RSVD: reserved bits must be 1
17609 +#define PFM_CCCR_RSVD ~((0xfull<<12) \
17610 + | (0x7full<<18) \
17613 +#define PFM_P4_NO64 (3ULL<<26) /* use 3 even in non HT mode */
17615 +#define PEBS_PMD 8 /* thread0: IQ_CTR4, thread1: IQ_CTR5 */
17618 + * With HyperThreading enabled:
17620 + * The ESCRs and CCCRs are divided in half with the top half
17621 + * belonging to logical processor 0 and the bottom half going to
17622 + * logical processor 1. Thus only half of the PMU resources are
17623 + * accessible to applications.
17625 + * PEBS is not available due to the fact that:
17626 + * - MSR_PEBS_MATRIX_VERT is shared between the threads
17627 + * - IA32_PEBS_ENABLE is shared between the threads
17629 + * With HyperThreading disabled:
17631 + * The full set of PMU resources is exposed to applications.
17633 + * The mapping is chosen such that PMCxx -> MSR is the same
17634 + * in HT and non HT mode, if register is present in HT mode.
17637 +#define PFM_REGT_NHTESCR (PFM_REGT_ESCR|PFM_REGT_NOHT)
17638 +#define PFM_REGT_NHTCCCR (PFM_REGT_CCCR|PFM_REGT_NOHT|PFM_REGT_EN)
17639 +#define PFM_REGT_NHTPEBS (PFM_REGT_PEBS|PFM_REGT_NOHT|PFM_REGT_EN)
17640 +#define PFM_REGT_NHTCTR (PFM_REGT_CTR|PFM_REGT_NOHT)
17641 +#define PFM_REGT_ENAC (PFM_REGT_CCCR|PFM_REGT_EN)
17643 +static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value);
17644 +static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value);
17645 +static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum);
17646 +static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum);
17647 +static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags);
17648 +static void pfm_p4_free_context(struct pfm_context *ctx);
17649 +static int pfm_p4_has_ovfls(struct pfm_context *ctx);
17650 +static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set);
17651 +static void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
17652 +static void pfm_p4_nmi_copy_state(struct pfm_context *ctx);
17653 +static void __kprobes pfm_p4_quiesce(void);
17655 +static u64 enable_mask[PFM_MAX_PMCS];
17656 +static u16 max_enable;
17658 +static struct pfm_p4_regmap pmc_addrs[PFM_MAX_PMCS] = {
17659 + /*pmc 0 */ {{MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1}, 0, PFM_REGT_ESCR}, /* BPU_ESCR0,1 */
17660 + /*pmc 1 */ {{MSR_P4_IS_ESCR0, MSR_P4_IS_ESCR1}, 0, PFM_REGT_ESCR}, /* IS_ESCR0,1 */
17661 + /*pmc 2 */ {{MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1}, 0, PFM_REGT_ESCR}, /* MOB_ESCR0,1 */
17662 + /*pmc 3 */ {{MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1}, 0, PFM_REGT_ESCR}, /* ITLB_ESCR0,1 */
17663 + /*pmc 4 */ {{MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1}, 0, PFM_REGT_ESCR}, /* PMH_ESCR0,1 */
17664 + /*pmc 5 */ {{MSR_P4_IX_ESCR0, MSR_P4_IX_ESCR1}, 0, PFM_REGT_ESCR}, /* IX_ESCR0,1 */
17665 + /*pmc 6 */ {{MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1}, 0, PFM_REGT_ESCR}, /* FSB_ESCR0,1 */
17666 + /*pmc 7 */ {{MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1}, 0, PFM_REGT_ESCR}, /* BSU_ESCR0,1 */
17667 + /*pmc 8 */ {{MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1}, 0, PFM_REGT_ESCR}, /* MS_ESCR0,1 */
17668 + /*pmc 9 */ {{MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1}, 0, PFM_REGT_ESCR}, /* TC_ESCR0,1 */
17669 + /*pmc 10*/ {{MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1}, 0, PFM_REGT_ESCR}, /* TBPU_ESCR0,1 */
17670 + /*pmc 11*/ {{MSR_P4_FLAME_ESCR0, MSR_P4_FLAME_ESCR1}, 0, PFM_REGT_ESCR}, /* FLAME_ESCR0,1 */
17671 + /*pmc 12*/ {{MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1}, 0, PFM_REGT_ESCR}, /* FIRM_ESCR0,1 */
17672 + /*pmc 13*/ {{MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1}, 0, PFM_REGT_ESCR}, /* SAAT_ESCR0,1 */
17673 + /*pmc 14*/ {{MSR_P4_U2L_ESCR0, MSR_P4_U2L_ESCR1}, 0, PFM_REGT_ESCR}, /* U2L_ESCR0,1 */
17674 + /*pmc 15*/ {{MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1}, 0, PFM_REGT_ESCR}, /* DAC_ESCR0,1 */
17675 + /*pmc 16*/ {{MSR_P4_IQ_ESCR0, MSR_P4_IQ_ESCR1}, 0, PFM_REGT_ESCR}, /* IQ_ESCR0,1 (only model 1 and 2) */
17676 + /*pmc 17*/ {{MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1}, 0, PFM_REGT_ESCR}, /* ALF_ESCR0,1 */
17677 + /*pmc 18*/ {{MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1}, 0, PFM_REGT_ESCR}, /* RAT_ESCR0,1 */
17678 + /*pmc 19*/ {{MSR_P4_SSU_ESCR0, 0}, 0, PFM_REGT_ESCR}, /* SSU_ESCR0 */
17679 + /*pmc 20*/ {{MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1}, 0, PFM_REGT_ESCR}, /* CRU_ESCR0,1 */
17680 + /*pmc 21*/ {{MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3}, 0, PFM_REGT_ESCR}, /* CRU_ESCR2,3 */
17681 + /*pmc 22*/ {{MSR_P4_CRU_ESCR4, MSR_P4_CRU_ESCR5}, 0, PFM_REGT_ESCR}, /* CRU_ESCR4,5 */
17683 + /*pmc 23*/ {{MSR_P4_BPU_CCCR0, MSR_P4_BPU_CCCR2}, 0, PFM_REGT_ENAC}, /* BPU_CCCR0,2 */
17684 + /*pmc 24*/ {{MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3}, 1, PFM_REGT_ENAC}, /* BPU_CCCR1,3 */
17685 + /*pmc 25*/ {{MSR_P4_MS_CCCR0, MSR_P4_MS_CCCR2}, 2, PFM_REGT_ENAC}, /* MS_CCCR0,2 */
17686 + /*pmc 26*/ {{MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3}, 3, PFM_REGT_ENAC}, /* MS_CCCR1,3 */
17687 + /*pmc 27*/ {{MSR_P4_FLAME_CCCR0, MSR_P4_FLAME_CCCR2}, 4, PFM_REGT_ENAC}, /* FLAME_CCCR0,2 */
17688 + /*pmc 28*/ {{MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3}, 5, PFM_REGT_ENAC}, /* FLAME_CCCR1,3 */
17689 + /*pmc 29*/ {{MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR2}, 6, PFM_REGT_ENAC}, /* IQ_CCCR0,2 */
17690 + /*pmc 30*/ {{MSR_P4_IQ_CCCR1, MSR_P4_IQ_CCCR3}, 7, PFM_REGT_ENAC}, /* IQ_CCCR1,3 */
17691 + /*pmc 31*/ {{MSR_P4_IQ_CCCR4, MSR_P4_IQ_CCCR5}, 8, PFM_REGT_ENAC}, /* IQ_CCCR4,5 */
17692 + /* non HT extensions */
17693 + /*pmc 32*/ {{MSR_P4_BPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BPU_ESCR1 */
17694 + /*pmc 33*/ {{MSR_P4_IS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IS_ESCR1 */
17695 + /*pmc 34*/ {{MSR_P4_MOB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MOB_ESCR1 */
17696 + /*pmc 35*/ {{MSR_P4_ITLB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ITLB_ESCR1 */
17697 + /*pmc 36*/ {{MSR_P4_PMH_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* PMH_ESCR1 */
17698 + /*pmc 37*/ {{MSR_P4_IX_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IX_ESCR1 */
17699 + /*pmc 38*/ {{MSR_P4_FSB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FSB_ESCR1 */
17700 + /*pmc 39*/ {{MSR_P4_BSU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BSU_ESCR1 */
17701 + /*pmc 40*/ {{MSR_P4_MS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MS_ESCR1 */
17702 + /*pmc 41*/ {{MSR_P4_TC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TC_ESCR1 */
17703 + /*pmc 42*/ {{MSR_P4_TBPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TBPU_ESCR1 */
17704 + /*pmc 43*/ {{MSR_P4_FLAME_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FLAME_ESCR1 */
17705 + /*pmc 44*/ {{MSR_P4_FIRM_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FIRM_ESCR1 */
17706 + /*pmc 45*/ {{MSR_P4_SAAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* SAAT_ESCR1 */
17707 + /*pmc 46*/ {{MSR_P4_U2L_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* U2L_ESCR1 */
17708 + /*pmc 47*/ {{MSR_P4_DAC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* DAC_ESCR1 */
17709 + /*pmc 48*/ {{MSR_P4_IQ_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IQ_ESCR1 (only model 1 and 2) */
17710 + /*pmc 49*/ {{MSR_P4_ALF_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ALF_ESCR1 */
17711 + /*pmc 50*/ {{MSR_P4_RAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* RAT_ESCR1 */
17712 + /*pmc 51*/ {{MSR_P4_CRU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR1 */
17713 + /*pmc 52*/ {{MSR_P4_CRU_ESCR3, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR3 */
17714 + /*pmc 53*/ {{MSR_P4_CRU_ESCR5, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR5 */
17715 + /*pmc 54*/ {{MSR_P4_BPU_CCCR1, 0}, 9, PFM_REGT_NHTCCCR}, /* BPU_CCCR1 */
17716 + /*pmc 55*/ {{MSR_P4_BPU_CCCR3, 0}, 10, PFM_REGT_NHTCCCR}, /* BPU_CCCR3 */
17717 + /*pmc 56*/ {{MSR_P4_MS_CCCR1, 0}, 11, PFM_REGT_NHTCCCR}, /* MS_CCCR1 */
17718 + /*pmc 57*/ {{MSR_P4_MS_CCCR3, 0}, 12, PFM_REGT_NHTCCCR}, /* MS_CCCR3 */
17719 + /*pmc 58*/ {{MSR_P4_FLAME_CCCR1, 0}, 13, PFM_REGT_NHTCCCR}, /* FLAME_CCCR1 */
17720 + /*pmc 59*/ {{MSR_P4_FLAME_CCCR3, 0}, 14, PFM_REGT_NHTCCCR}, /* FLAME_CCCR3 */
17721 + /*pmc 60*/ {{MSR_P4_IQ_CCCR2, 0}, 15, PFM_REGT_NHTCCCR}, /* IQ_CCCR2 */
17722 + /*pmc 61*/ {{MSR_P4_IQ_CCCR3, 0}, 16, PFM_REGT_NHTCCCR}, /* IQ_CCCR3 */
17723 + /*pmc 62*/ {{MSR_P4_IQ_CCCR5, 0}, 17, PFM_REGT_NHTCCCR}, /* IQ_CCCR5 */
17724 + /*pmc 63*/ {{0x3f2, 0}, 0, PFM_REGT_NHTPEBS},/* PEBS_MATRIX_VERT */
17725 + /*pmc 64*/ {{0x3f1, 0}, 0, PFM_REGT_NHTPEBS} /* PEBS_ENABLE */
17728 +static struct pfm_p4_regmap pmd_addrs[PFM_MAX_PMDS] = {
17729 + /*pmd 0 */ {{MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_PERFCTR2}, 0, PFM_REGT_CTR}, /* BPU_CTR0,2 */
17730 + /*pmd 1 */ {{MSR_P4_BPU_PERFCTR1, MSR_P4_BPU_PERFCTR3}, 0, PFM_REGT_CTR}, /* BPU_CTR1,3 */
17731 + /*pmd 2 */ {{MSR_P4_MS_PERFCTR0, MSR_P4_MS_PERFCTR2}, 0, PFM_REGT_CTR}, /* MS_CTR0,2 */
17732 + /*pmd 3 */ {{MSR_P4_MS_PERFCTR1, MSR_P4_MS_PERFCTR3}, 0, PFM_REGT_CTR}, /* MS_CTR1,3 */
17733 + /*pmd 4 */ {{MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_PERFCTR2}, 0, PFM_REGT_CTR}, /* FLAME_CTR0,2 */
17734 + /*pmd 5 */ {{MSR_P4_FLAME_PERFCTR1, MSR_P4_FLAME_PERFCTR3}, 0, PFM_REGT_CTR}, /* FLAME_CTR1,3 */
17735 + /*pmd 6 */ {{MSR_P4_IQ_PERFCTR0, MSR_P4_IQ_PERFCTR2}, 0, PFM_REGT_CTR}, /* IQ_CTR0,2 */
17736 + /*pmd 7 */ {{MSR_P4_IQ_PERFCTR1, MSR_P4_IQ_PERFCTR3}, 0, PFM_REGT_CTR}, /* IQ_CTR1,3 */
17737 + /*pmd 8 */ {{MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_PERFCTR5}, 0, PFM_REGT_CTR}, /* IQ_CTR4,5 */
17739 + * non HT extensions
17741 + /*pmd 9 */ {{MSR_P4_BPU_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR2 */
17742 + /*pmd 10*/ {{MSR_P4_BPU_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR3 */
17743 + /*pmd 11*/ {{MSR_P4_MS_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR2 */
17744 + /*pmd 12*/ {{MSR_P4_MS_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR3 */
17745 + /*pmd 13*/ {{MSR_P4_FLAME_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR2 */
17746 + /*pmd 14*/ {{MSR_P4_FLAME_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR3 */
17747 + /*pmd 15*/ {{MSR_P4_IQ_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR2 */
17748 + /*pmd 16*/ {{MSR_P4_IQ_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR3 */
17749 + /*pmd 17*/ {{MSR_P4_IQ_PERFCTR5, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR5 */
17752 +static struct pfm_arch_pmu_info pfm_p4_pmu_info = {
17753 + .write_pmc = pfm_p4_write_pmc,
17754 + .write_pmd = pfm_p4_write_pmd,
17755 + .read_pmc = pfm_p4_read_pmc,
17756 + .read_pmd = pfm_p4_read_pmd,
17757 + .create_context = pfm_p4_create_context,
17758 + .free_context = pfm_p4_free_context,
17759 + .has_ovfls = pfm_p4_has_ovfls,
17760 + .stop_save = pfm_p4_stop_save,
17761 + .restore_pmcs = pfm_p4_restore_pmcs,
17762 + .nmi_copy_state = pfm_p4_nmi_copy_state,
17763 + .quiesce = pfm_p4_quiesce
17766 +static struct pfm_regmap_desc pfm_p4_pmc_desc[] = {
17767 +/* pmc0 */ PMC_D(PFM_REG_I, "BPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR0),
17768 +/* pmc1 */ PMC_D(PFM_REG_I, "IS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0),
17769 +/* pmc2 */ PMC_D(PFM_REG_I, "MOB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR0),
17770 +/* pmc3 */ PMC_D(PFM_REG_I, "ITLB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR0),
17771 +/* pmc4 */ PMC_D(PFM_REG_I, "PMH_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR0),
17772 +/* pmc5 */ PMC_D(PFM_REG_I, "IX_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR0),
17773 +/* pmc6 */ PMC_D(PFM_REG_I, "FSB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR0),
17774 +/* pmc7 */ PMC_D(PFM_REG_I, "BSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR0),
17775 +/* pmc8 */ PMC_D(PFM_REG_I, "MS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR0),
17776 +/* pmc9 */ PMC_D(PFM_REG_I, "TC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR0),
17777 +/* pmc10 */ PMC_D(PFM_REG_I, "TBPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR0),
17778 +/* pmc11 */ PMC_D(PFM_REG_I, "FLAME_ESCR0", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR0),
17779 +/* pmc12 */ PMC_D(PFM_REG_I, "FIRM_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR0),
17780 +/* pmc13 */ PMC_D(PFM_REG_I, "SAAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR0),
17781 +/* pmc14 */ PMC_D(PFM_REG_I, "U2L_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR0),
17782 +/* pmc15 */ PMC_D(PFM_REG_I, "DAC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR0),
17783 +/* pmc16 */ PMC_D(PFM_REG_I, "IQ_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), /* only model 1 and 2*/
17784 +/* pmc17 */ PMC_D(PFM_REG_I, "ALF_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR0),
17785 +/* pmc18 */ PMC_D(PFM_REG_I, "RAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR0),
17786 +/* pmc19 */ PMC_D(PFM_REG_I, "SSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SSU_ESCR0),
17787 +/* pmc20 */ PMC_D(PFM_REG_I, "CRU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR0),
17788 +/* pmc21 */ PMC_D(PFM_REG_I, "CRU_ESCR2" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR2),
17789 +/* pmc22 */ PMC_D(PFM_REG_I, "CRU_ESCR4" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR4),
17790 +/* pmc23 */ PMC_D(PFM_REG_I64, "BPU_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR0),
17791 +/* pmc24 */ PMC_D(PFM_REG_I64, "BPU_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR1),
17792 +/* pmc25 */ PMC_D(PFM_REG_I64, "MS_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR0),
17793 +/* pmc26 */ PMC_D(PFM_REG_I64, "MS_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR1),
17794 +/* pmc27 */ PMC_D(PFM_REG_I64, "FLAME_CCCR0", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR0),
17795 +/* pmc28 */ PMC_D(PFM_REG_I64, "FLAME_CCCR1", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR1),
17796 +/* pmc29 */ PMC_D(PFM_REG_I64, "IQ_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR0),
17797 +/* pmc30 */ PMC_D(PFM_REG_I64, "IQ_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR1),
17798 +/* pmc31 */ PMC_D(PFM_REG_I64, "IQ_CCCR4" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR4),
17799 + /* No HT extension */
17800 +/* pmc32 */ PMC_D(PFM_REG_I, "BPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR1),
17801 +/* pmc33 */ PMC_D(PFM_REG_I, "IS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IS_ESCR1),
17802 +/* pmc34 */ PMC_D(PFM_REG_I, "MOB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR1),
17803 +/* pmc35 */ PMC_D(PFM_REG_I, "ITLB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR1),
17804 +/* pmc36 */ PMC_D(PFM_REG_I, "PMH_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR1),
17805 +/* pmc37 */ PMC_D(PFM_REG_I, "IX_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR1),
17806 +/* pmc38 */ PMC_D(PFM_REG_I, "FSB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR1),
17807 +/* pmc39 */ PMC_D(PFM_REG_I, "BSU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR1),
17808 +/* pmc40 */ PMC_D(PFM_REG_I, "MS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR1),
17809 +/* pmc41 */ PMC_D(PFM_REG_I, "TC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR1),
17810 +/* pmc42 */ PMC_D(PFM_REG_I, "TBPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR1),
17811 +/* pmc43 */ PMC_D(PFM_REG_I, "FLAME_ESCR1", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR1),
17812 +/* pmc44 */ PMC_D(PFM_REG_I, "FIRM_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR1),
17813 +/* pmc45 */ PMC_D(PFM_REG_I, "SAAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR1),
17814 +/* pmc46 */ PMC_D(PFM_REG_I, "U2L_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR1),
17815 +/* pmc47 */ PMC_D(PFM_REG_I, "DAC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR1),
17816 +/* pmc48 */ PMC_D(PFM_REG_I, "IQ_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR1), /* only model 1 and 2 */
17817 +/* pmc49 */ PMC_D(PFM_REG_I, "ALF_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR1),
17818 +/* pmc50 */ PMC_D(PFM_REG_I, "RAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR1),
17819 +/* pmc51 */ PMC_D(PFM_REG_I, "CRU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR1),
17820 +/* pmc52 */ PMC_D(PFM_REG_I, "CRU_ESCR3" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR3),
17821 +/* pmc53 */ PMC_D(PFM_REG_I, "CRU_ESCR5" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR5),
17822 +/* pmc54 */ PMC_D(PFM_REG_I64, "BPU_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR2),
17823 +/* pmc55 */ PMC_D(PFM_REG_I64, "BPU_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR3),
17824 +/* pmc56 */ PMC_D(PFM_REG_I64, "MS_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR2),
17825 +/* pmc57 */ PMC_D(PFM_REG_I64, "MS_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR3),
17826 +/* pmc58 */ PMC_D(PFM_REG_I64, "FLAME_CCCR2", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR2),
17827 +/* pmc59 */ PMC_D(PFM_REG_I64, "FLAME_CCCR3", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR3),
17828 +/* pmc60 */ PMC_D(PFM_REG_I64, "IQ_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR2),
17829 +/* pmc61 */ PMC_D(PFM_REG_I64, "IQ_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR3),
17830 +/* pmc62 */ PMC_D(PFM_REG_I64, "IQ_CCCR5" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR5),
17831 +/* pmc63 */ PMC_D(PFM_REG_I, "PEBS_MATRIX_VERT", 0, 0xffffffffffffffecULL, 0, 0x3f2),
17832 +/* pmc64 */ PMC_D(PFM_REG_I, "PEBS_ENABLE", 0, 0xfffffffff8ffe000ULL, 0, 0x3f1)
17834 +#define PFM_P4_NUM_PMCS ARRAY_SIZE(pfm_p4_pmc_desc)
17837 + * See section 15.10.6.6 for details about the IQ block
17839 +static struct pfm_regmap_desc pfm_p4_pmd_desc[] = {
17840 +/* pmd0 */ PMD_D(PFM_REG_C, "BPU_CTR0", MSR_P4_BPU_PERFCTR0),
17841 +/* pmd1 */ PMD_D(PFM_REG_C, "BPU_CTR1", MSR_P4_BPU_PERFCTR1),
17842 +/* pmd2 */ PMD_D(PFM_REG_C, "MS_CTR0", MSR_P4_MS_PERFCTR0),
17843 +/* pmd3 */ PMD_D(PFM_REG_C, "MS_CTR1", MSR_P4_MS_PERFCTR1),
17844 +/* pmd4 */ PMD_D(PFM_REG_C, "FLAME_CTR0", MSR_P4_FLAME_PERFCTR0),
17845 +/* pmd5 */ PMD_D(PFM_REG_C, "FLAME_CTR1", MSR_P4_FLAME_PERFCTR1),
17846 +/* pmd6 */ PMD_D(PFM_REG_C, "IQ_CTR0", MSR_P4_IQ_PERFCTR0),
17847 +/* pmd7 */ PMD_D(PFM_REG_C, "IQ_CTR1", MSR_P4_IQ_PERFCTR1),
17848 +/* pmd8 */ PMD_D(PFM_REG_C, "IQ_CTR4", MSR_P4_IQ_PERFCTR4),
17849 + /* no HT extension */
17850 +/* pmd9 */ PMD_D(PFM_REG_C, "BPU_CTR2", MSR_P4_BPU_PERFCTR2),
17851 +/* pmd10 */ PMD_D(PFM_REG_C, "BPU_CTR3", MSR_P4_BPU_PERFCTR3),
17852 +/* pmd11 */ PMD_D(PFM_REG_C, "MS_CTR2", MSR_P4_MS_PERFCTR2),
17853 +/* pmd12 */ PMD_D(PFM_REG_C, "MS_CTR3", MSR_P4_MS_PERFCTR3),
17854 +/* pmd13 */ PMD_D(PFM_REG_C, "FLAME_CTR2", MSR_P4_FLAME_PERFCTR2),
17855 +/* pmd14 */ PMD_D(PFM_REG_C, "FLAME_CTR3", MSR_P4_FLAME_PERFCTR3),
17856 +/* pmd15 */ PMD_D(PFM_REG_C, "IQ_CTR2", MSR_P4_IQ_PERFCTR2),
17857 +/* pmd16 */ PMD_D(PFM_REG_C, "IQ_CTR3", MSR_P4_IQ_PERFCTR3),
17858 +/* pmd17 */ PMD_D(PFM_REG_C, "IQ_CTR5", MSR_P4_IQ_PERFCTR5)
17860 +#define PFM_P4_NUM_PMDS ARRAY_SIZE(pfm_p4_pmd_desc)
17863 + * Due to hotplug CPU support, threads may not necessarily
17864 + * be activated at the time the module is inserted. We need
17865 + * to check whether they could be activated by looking at
17866 + * the present CPU (present != online).
17868 +static int pfm_p4_probe_pmu(void)
17874 + * only works on Intel processors
17876 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
17877 + PFM_INFO("not running on Intel processor");
17881 + if (current_cpu_data.x86 != 15) {
17882 + PFM_INFO("unsupported family=%d", current_cpu_data.x86);
17886 + switch (current_cpu_data.x86_model) {
17891 + * IQ_ESCR0, IQ_ESCR1 only present on model 1, 2
17893 + pfm_p4_pmc_desc[16].type = PFM_REG_NA;
17894 + pfm_p4_pmc_desc[48].type = PFM_REG_NA;
17898 + * do not know if they all work the same, so reject
17902 + PFM_INFO("unsupported model %d",
17903 + current_cpu_data.x86_model);
17909 + * check for local APIC (required)
17911 + if (!cpu_has_apic) {
17912 + PFM_INFO("no local APIC, unsupported");
17916 + ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map))
17917 + / current_cpu_data.x86_max_cores) > 1;
17921 + if (cpu_has_ht) {
17923 + PFM_INFO("HyperThreading supported, status %s",
17924 + ht_enabled ? "on": "off");
17926 + * disable registers not supporting HT
17928 + if (ht_enabled) {
17929 + PFM_INFO("disabling half the registers for HT");
17930 + for (i = 0; i < PFM_P4_NUM_PMCS; i++) {
17931 + if (pmc_addrs[(i)].reg_type & PFM_REGT_NOHT)
17932 + pfm_p4_pmc_desc[i].type = PFM_REG_NA;
17934 + for (i = 0; i < PFM_P4_NUM_PMDS; i++) {
17935 + if (pmd_addrs[(i)].reg_type & PFM_REGT_NOHT)
17936 + pfm_p4_pmd_desc[i].type = PFM_REG_NA;
17941 + if (cpu_has_ds) {
17942 + PFM_INFO("Data Save Area (DS) supported");
17944 + if (cpu_has_pebs) {
17946 + * PEBS does not work with HyperThreading enabled
17949 + PFM_INFO("PEBS supported, status off (because of HT)");
17951 + PFM_INFO("PEBS supported, status on");
17956 + * build enable mask
17958 + for (i = 0; i < PFM_P4_NUM_PMCS; i++) {
17959 + if (pmc_addrs[(i)].reg_type & PFM_REGT_EN) {
17960 + __set_bit(i, cast_ulp(enable_mask));
17961 + max_enable = i + 1;
17966 + pfm_p4_pmu_info.flags |= PFM_X86_FL_USE_NMI;
17969 +static inline int get_smt_id(void)
17972 + int cpu = smp_processor_id();
17973 + return (cpu != first_cpu(__get_cpu_var(cpu_sibling_map)));
17979 +static void __pfm_write_reg_p4(const struct pfm_p4_regmap *xreg, u64 val)
17984 + smt_id = get_smt_id();
17986 + * HT is only supported by P4-style PMU
17988 + * Adjust for T1 if necessary:
17990 + * - move the T0_OS/T0_USR bits into T1 slots
17991 + * - move the OVF_PMI_T0 bits into T1 slot
17993 + * The P4/EM64T T1 is cleared by description table.
17994 + * User only works with T0.
17997 + if (xreg->reg_type & PFM_REGT_ESCR) {
17999 + /* copy T0_USR & T0_OS to T1 */
18000 + val |= ((val & 0xc) >> 2);
18002 + /* clear bits T0_USR & T0_OS */
18005 + } else if (xreg->reg_type & PFM_REGT_CCCR) {
18006 + pmi = (val >> 26) & 0x1;
18008 + val &= ~(1UL<<26);
18013 + if (xreg->addrs[smt_id])
18014 + wrmsrl(xreg->addrs[smt_id], val);
18017 +void __pfm_read_reg_p4(const struct pfm_p4_regmap *xreg, u64 *val)
18021 + smt_id = get_smt_id();
18023 + if (likely(xreg->addrs[smt_id])) {
18024 + rdmsrl(xreg->addrs[smt_id], *val);
18026 + * HT is only supported by P4-style PMU
18028 + * move the Tx_OS and Tx_USR bits into
18029 + * T0 slots setting the T1 slots to zero
18031 + if (xreg->reg_type & PFM_REGT_ESCR) {
18033 + *val |= (((*val) & 0x3) << 2);
18036 + * zero out bits that are reserved
18037 + * (including T1_OS and T1_USR)
18039 + *val &= PFM_ESCR_RSVD;
18045 +static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value)
18047 + __pfm_write_reg_p4(&pmc_addrs[cnum], value);
18050 +static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value)
18052 + __pfm_write_reg_p4(&pmd_addrs[cnum], value);
18055 +static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum)
18058 + __pfm_read_reg_p4(&pmd_addrs[cnum], &tmp);
18062 +static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum)
18065 + __pfm_read_reg_p4(&pmc_addrs[cnum], &tmp);
18069 +struct pfm_ds_area_p4 {
18070 + unsigned long bts_buf_base;
18071 + unsigned long bts_index;
18072 + unsigned long bts_abs_max;
18073 + unsigned long bts_intr_thres;
18074 + unsigned long pebs_buf_base;
18075 + unsigned long pebs_index;
18076 + unsigned long pebs_abs_max;
18077 + unsigned long pebs_intr_thres;
18078 + u64 pebs_cnt_reset;
18082 +static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
18084 + struct pfm_arch_pmu_info *pmu_info;
18085 + struct pfm_arch_context *ctx_arch;
18086 + struct pfm_ds_area_p4 *ds = NULL;
18087 + u64 used_mask[PFM_PMC_BV];
18088 + u16 i, j, count, pebs_idx = ~0;
18090 + u64 cccr, ctr1, ctr2, ovfl_mask;
18092 + pmu_info = &pfm_p4_pmu_info;
18093 + ctx_arch = pfm_ctx_arch(ctx);
18094 + max_pmc = ctx->regs.max_pmc;
18095 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
18098 + * build used enable PMC bitmask
18099 + * if user did not set any CCCR, then mask is
18100 + * empty and there is nothing to do because nothing
18103 + bitmap_and(cast_ulp(used_mask),
18104 + cast_ulp(set->used_pmcs),
18105 + cast_ulp(enable_mask),
18108 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
18110 + PFM_DBG_ovfl("npend=%u ena_mask=0x%llx u_pmcs=0x%llx count=%u num=%u",
18111 + set->npend_ovfls,
18112 + (unsigned long long)enable_mask[0],
18113 + (unsigned long long)set->used_pmcs[0],
18114 + count, max_enable);
18117 + * ensures we do not destroy pending overflow
18118 + * information. If pended interrupts are already
18119 + * known, then we just stop monitoring.
18121 + if (set->npend_ovfls) {
18123 + * clear enable bit
18124 + * unfortunately, this is very expensive!
18126 + for (i = 0; count; i++) {
18127 + if (test_bit(i, cast_ulp(used_mask))) {
18128 + __pfm_write_reg_p4(pmc_addrs+i, 0);
18132 + /* need save PMDs at upper level */
18136 + if (ctx_arch->flags.use_pebs) {
18137 + ds = ctx_arch->ds_area;
18138 + pebs_idx = PEBS_PMD;
18139 + PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx",
18141 + (unsigned long long)ds->pebs_index,
18142 + (unsigned long long)ds->pebs_intr_thres);
18146 + * stop monitoring AND collect pending overflow information AND
18149 + * We need to access the CCCR twice, once to get overflow info
18150 + * and a second to stop monitoring (which destroys the OVF flag)
18151 + * Similarly, we need to read the counter twice to check whether
18152 + * it did overflow between the CCR read and the CCCR write.
18154 + for (i = 0; count; i++) {
18155 + if (i != pebs_idx && test_bit(i, cast_ulp(used_mask))) {
18157 + * controlled counter
18159 + j = pmc_addrs[i].ctr;
18161 + /* read CCCR (PMC) value */
18162 + __pfm_read_reg_p4(pmc_addrs+i, &cccr);
18164 + /* read counter (PMD) controlled by PMC */
18165 + __pfm_read_reg_p4(pmd_addrs+j, &ctr1);
18167 + /* clear CCCR value: stop counter but destroy OVF */
18168 + __pfm_write_reg_p4(pmc_addrs+i, 0);
18170 + /* read counter controlled by CCCR again */
18171 + __pfm_read_reg_p4(pmd_addrs+j, &ctr2);
18174 + * there is an overflow if either:
18175 + * - CCCR.ovf is set (and we just cleared it)
18177 + * in that case we set the bit corresponding to the
18178 + * overflowed PMD in povfl_pmds.
18180 + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) {
18181 + __set_bit(j, cast_ulp(set->povfl_pmds));
18182 + set->npend_ovfls++;
18184 + ctr2 = (set->pmds[j].value & ~ovfl_mask) | (ctr2 & ovfl_mask);
18185 + set->pmds[j].value = ctr2;
18190 + * check for PEBS buffer full and set the corresponding PMD overflow
18192 + if (ctx_arch->flags.use_pebs) {
18193 + PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", ds, ds->pebs_index, ds->pebs_intr_thres);
18194 + if (ds->pebs_index >= ds->pebs_intr_thres
18195 + && test_bit(PEBS_PMD, cast_ulp(set->used_pmds))) {
18196 + __set_bit(PEBS_PMD, cast_ulp(set->povfl_pmds));
18197 + set->npend_ovfls++;
18200 + /* 0 means: no need to save the PMD at higher level */
18204 +static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags)
18206 + struct pfm_arch_context *ctx_arch;
18208 + ctx_arch = pfm_ctx_arch(ctx);
18210 + ctx_arch->data = kzalloc(sizeof(struct pfm_arch_p4_context), GFP_KERNEL);
18211 + if (!ctx_arch->data)
18217 +static void pfm_p4_free_context(struct pfm_context *ctx)
18219 + struct pfm_arch_context *ctx_arch;
18221 + ctx_arch = pfm_ctx_arch(ctx);
18223 + * we do not check if P4, because it would be NULL and
18224 + * kfree can deal with NULL
18226 + kfree(ctx_arch->data);
18230 + * detect is counters have overflowed.
18232 + * 0 : no overflow
18233 + * 1 : at least one overflow
18235 + * used by Intel P4
18237 +static int __kprobes pfm_p4_has_ovfls(struct pfm_context *ctx)
18239 + struct pfm_arch_pmu_info *pmu_info;
18240 + struct pfm_p4_regmap *xrc, *xrd;
18241 + struct pfm_arch_context *ctx_arch;
18242 + struct pfm_arch_p4_context *p4;
18243 + u64 ena_mask[PFM_PMC_BV];
18244 + u64 cccr, ctr1, ctr2;
18247 + pmu_info = &pfm_p4_pmu_info;
18249 + ctx_arch = pfm_ctx_arch(ctx);
18252 + p4 = ctx_arch->data;
18254 + bitmap_and(cast_ulp(ena_mask),
18255 + cast_ulp(ctx->regs.pmcs),
18256 + cast_ulp(enable_mask),
18259 + n = bitmap_weight(cast_ulp(ena_mask), max_enable);
18261 + for (i = 0; n; i++) {
18262 + if (!test_bit(i, cast_ulp(ena_mask)))
18265 + * controlled counter
18269 + /* read CCCR (PMC) value */
18270 + __pfm_read_reg_p4(xrc+i, &cccr);
18272 + /* read counter (PMD) controlled by PMC */
18273 + __pfm_read_reg_p4(xrd+j, &ctr1);
18275 + /* clear CCCR value: stop counter but destroy OVF */
18276 + __pfm_write_reg_p4(xrc+i, 0);
18278 + /* read counter controlled by CCCR again */
18279 + __pfm_read_reg_p4(xrd+j, &ctr2);
18282 + * there is an overflow if either:
18283 + * - CCCR.ovf is set (and we just cleared it)
18285 + * in that case we set the bit corresponding to the
18286 + * overflowed PMD in povfl_pmds.
18288 + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) {
18289 + __set_bit(j, cast_ulp(p4->povfl_pmds));
18290 + p4->npend_ovfls++;
18292 + p4->saved_cccrs[i] = cccr;
18296 + * if there was no overflow, then it means the NMI was not really
18297 + * for us, so we have to resume monitoring
18299 + if (unlikely(!p4->npend_ovfls)) {
18300 + for (i = 0; n; i++) {
18301 + if (!test_bit(i, cast_ulp(ena_mask)))
18303 + __pfm_write_reg_p4(xrc+i, p4->saved_cccrs[i]);
18309 +void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
18311 + struct pfm_arch_pmu_info *pmu_info;
18312 + struct pfm_arch_context *ctx_arch;
18316 + ctx_arch = pfm_ctx_arch(ctx);
18317 + pmu_info = pfm_pmu_info();
18320 + * must restore DS pointer before restoring PMCs
18321 + * as this can potentially reactivate monitoring
18323 + if (ctx_arch->flags.use_ds)
18324 + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
18327 + * must restore everything because there are some dependencies
18328 + * (e.g., ESCR and CCCR)
18330 + num = ctx->regs.num_pmcs;
18331 + mask = ctx->regs.pmcs;
18332 + for (i = 0; num; i++) {
18333 + if (test_bit(i, cast_ulp(mask))) {
18334 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
18341 + * invoked only when NMI is used. Called from the LOCAL_PERFMON_VECTOR
18342 + * handler to copy P4 overflow state captured when the NMI triggered.
18343 + * Given that on P4, stopping monitoring destroy the overflow information
18344 + * we save it in pfm_has_ovfl_p4() where monitoring is also stopped.
18346 + * Here we propagate the overflow state to current active set. The
18347 + * freeze_pmu() call we not overwrite this state because npend_ovfls
18350 +static void pfm_p4_nmi_copy_state(struct pfm_context *ctx)
18352 + struct pfm_arch_context *ctx_arch;
18353 + struct pfm_event_set *set;
18354 + struct pfm_arch_p4_context *p4;
18356 + ctx_arch = pfm_ctx_arch(ctx);
18357 + p4 = ctx_arch->data;
18358 + set = ctx->active_set;
18360 + if (p4->npend_ovfls) {
18361 + set->npend_ovfls = p4->npend_ovfls;
18363 + bitmap_copy(cast_ulp(set->povfl_pmds),
18364 + cast_ulp(p4->povfl_pmds),
18365 + ctx->regs.max_pmd);
18367 + p4->npend_ovfls = 0;
18372 + * pfm_p4_quiesce - stop monitoring without grabbing any lock
18374 + * called from NMI interrupt handler to immediately stop monitoring
18375 + * cannot grab any lock, including perfmon related locks
18377 +static void __kprobes pfm_p4_quiesce(void)
18381 + * quiesce PMU by clearing available registers that have
18382 + * the start/stop capability
18384 + for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) {
18385 + if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))
18386 + && test_bit(i, cast_ulp(enable_mask)))
18387 + __pfm_write_reg_p4(pmc_addrs+i, 0);
18392 +static struct pfm_pmu_config pfm_p4_pmu_conf = {
18393 + .pmu_name = "Intel P4",
18394 + .counter_width = 40,
18395 + .pmd_desc = pfm_p4_pmd_desc,
18396 + .pmc_desc = pfm_p4_pmc_desc,
18397 + .num_pmc_entries = PFM_P4_NUM_PMCS,
18398 + .num_pmd_entries = PFM_P4_NUM_PMDS,
18399 + .probe_pmu = pfm_p4_probe_pmu,
18400 + .version = "1.0",
18401 + .flags = PFM_PMU_BUILTIN_FLAG,
18402 + .owner = THIS_MODULE,
18403 + .pmu_info = &pfm_p4_pmu_info
18406 +static int __init pfm_p4_pmu_init_module(void)
18408 + return pfm_pmu_register(&pfm_p4_pmu_conf);
18411 +static void __exit pfm_p4_pmu_cleanup_module(void)
18413 + pfm_pmu_unregister(&pfm_p4_pmu_conf);
18416 +module_init(pfm_p4_pmu_init_module);
18417 +module_exit(pfm_p4_pmu_cleanup_module);
18418 diff --git a/arch/x86/perfmon/perfmon_p6.c b/arch/x86/perfmon/perfmon_p6.c
18419 new file mode 100644
18420 index 0000000..47c0a46
18422 +++ b/arch/x86/perfmon/perfmon_p6.c
18425 + * This file contains the P6 family processor PMU register description tables
18427 + * This module supports original P6 processors
18428 + * (Pentium II, Pentium Pro, Pentium III) and Pentium M.
18430 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
18431 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
18433 + * This program is free software; you can redistribute it and/or
18434 + * modify it under the terms of version 2 of the GNU General Public
18435 + * License as published by the Free Software Foundation.
18437 + * This program is distributed in the hope that it will be useful,
18438 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18439 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18440 + * General Public License for more details.
18442 + * You should have received a copy of the GNU General Public License
18443 + * along with this program; if not, write to the Free Software
18444 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18447 +#include <linux/module.h>
18448 +#include <linux/kprobes.h>
18449 +#include <linux/perfmon_kern.h>
18450 +#include <linux/nmi.h>
18451 +#include <asm/msr.h>
18453 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
18454 +MODULE_DESCRIPTION("P6 PMU description table");
18455 +MODULE_LICENSE("GPL");
18457 +static int force_nmi;
18458 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
18459 +module_param(force_nmi, bool, 0600);
18462 + * - upper 32 bits are reserved
18463 + * - INT: APIC enable bit is reserved (forced to 1)
18464 + * - bit 21 is reserved
18465 + * - bit 22 is reserved on PEREVNTSEL1
18467 + * RSVD: reserved bits are 1
18469 +#define PFM_P6_PMC0_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21))
18470 +#define PFM_P6_PMC1_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (3ULL<<21))
18473 + * force Local APIC interrupt on overflow
18474 + * disable with NO_EMUL64
18476 +#define PFM_P6_PMC_VAL (1ULL<<20)
18477 +#define PFM_P6_NO64 (1ULL<<20)
18480 +static void __kprobes pfm_p6_quiesce(void);
18481 +static int pfm_p6_has_ovfls(struct pfm_context *ctx);
18482 +static int pfm_p6_stop_save(struct pfm_context *ctx,
18483 + struct pfm_event_set *set);
18485 +static u64 enable_mask[PFM_MAX_PMCS];
18486 +static u16 max_enable;
18489 + * PFM_X86_FL_NO_SHARING: because of the single enable bit on MSR_P6_EVNTSEL0
18490 + * the PMU cannot be shared with NMI watchdog or Oprofile
18492 +struct pfm_arch_pmu_info pfm_p6_pmu_info = {
18493 + .stop_save = pfm_p6_stop_save,
18494 + .has_ovfls = pfm_p6_has_ovfls,
18495 + .quiesce = pfm_p6_quiesce,
18496 + .flags = PFM_X86_FL_NO_SHARING,
18499 +static struct pfm_regmap_desc pfm_p6_pmc_desc[] = {
18500 +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFEVTSEL0", PFM_P6_PMC_VAL, PFM_P6_PMC0_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL0),
18501 +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFEVTSEL1", PFM_P6_PMC_VAL, PFM_P6_PMC1_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL1)
18503 +#define PFM_P6_NUM_PMCS ARRAY_SIZE(pfm_p6_pmc_desc)
18505 +#define PFM_P6_D(n) \
18506 + { .type = PFM_REG_C, \
18507 + .desc = "PERFCTR"#n, \
18508 + .hw_addr = MSR_P6_PERFCTR0+n, \
18510 + .dep_pmcs[0] = 1ULL << n \
18513 +static struct pfm_regmap_desc pfm_p6_pmd_desc[] = {
18514 +/* pmd0 */ PFM_P6_D(0),
18515 +/* pmd1 */ PFM_P6_D(1)
18517 +#define PFM_P6_NUM_PMDS ARRAY_SIZE(pfm_p6_pmd_desc)
18519 +static int pfm_p6_probe_pmu(void)
18523 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
18524 + PFM_INFO("not an Intel processor");
18529 + * check for P6 processor family
18531 + if (current_cpu_data.x86 != 6) {
18532 + PFM_INFO("unsupported family=%d", current_cpu_data.x86);
18536 + switch (current_cpu_data.x86_model) {
18537 + case 1: /* Pentium Pro */
18539 + case 5: /* Pentium II Deschutes */
18543 + /* for Pentium M, we need to check if PMU exist */
18544 + rdmsr(MSR_IA32_MISC_ENABLE, low, high);
18545 + if (low & (1U << 7))
18548 + PFM_INFO("unsupported CPU model %d",
18549 + current_cpu_data.x86_model);
18554 + if (!cpu_has_apic) {
18555 + PFM_INFO("no Local APIC, try rebooting with lapic");
18558 + __set_bit(0, cast_ulp(enable_mask));
18559 + __set_bit(1, cast_ulp(enable_mask));
18560 + max_enable = 1 + 1;
18562 + * force NMI interrupt?
18565 + pfm_p6_pmu_info.flags |= PFM_X86_FL_USE_NMI;
18571 + * pfm_p6_has_ovfls - check for pending overflow condition
18572 + * @ctx: context to work on
18574 + * detect if counters have overflowed.
18576 + * 0 : no overflow
18577 + * 1 : at least one overflow
18579 +static int __kprobes pfm_p6_has_ovfls(struct pfm_context *ctx)
18585 + cnt_mask = ctx->regs.cnt_pmds;
18586 + num = ctx->regs.num_counters;
18587 + wmask = 1ULL << pfm_pmu_conf->counter_width;
18590 + * we can leverage the fact that we know the mapping
18591 + * to hardcode the MSR address and avoid accessing
18592 + * more cachelines
18594 + * We need to check cnt_mask because not all registers
18595 + * may be available.
18597 + for (i = 0; num; i++) {
18598 + if (test_bit(i, cast_ulp(cnt_mask))) {
18599 + rdmsrl(MSR_P6_PERFCTR0+i, val);
18600 + if (!(val & wmask))
18609 + * pfm_p6_stop_save -- stop monitoring and save PMD values
18610 + * @ctx: context to work on
18611 + * @set: current event set
18614 + * 0 - no need to save PMDs in caller
18615 + * 1 - need to save PMDs in caller
18617 +static int pfm_p6_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
18619 + struct pfm_arch_pmu_info *pmu_info;
18620 + u64 used_mask[PFM_PMC_BV];
18622 + u64 val, wmask, ovfl_mask;
18625 + pmu_info = pfm_pmu_info();
18627 + wmask = 1ULL << pfm_pmu_conf->counter_width;
18628 + bitmap_and(cast_ulp(used_mask),
18629 + cast_ulp(set->used_pmcs),
18630 + cast_ulp(enable_mask),
18633 + count = bitmap_weight(cast_ulp(used_mask), ctx->regs.max_pmc);
18636 + * stop monitoring
18637 + * Unfortunately, this is very expensive!
18638 + * wrmsrl() is serializing.
18640 + for (i = 0; count; i++) {
18641 + if (test_bit(i, cast_ulp(used_mask))) {
18642 + wrmsrl(MSR_P6_EVNTSEL0+i, 0);
18648 + * if we already having a pending overflow condition, we simply
18649 + * return to take care of this first.
18651 + if (set->npend_ovfls)
18654 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
18655 + cnt_pmds = ctx->regs.cnt_pmds;
18658 + * check for pending overflows and save PMDs (combo)
18659 + * we employ used_pmds because we also need to save
18660 + * and not just check for pending interrupts.
18662 + * Must check for counting PMDs because of virtual PMDs
18664 + count = set->nused_pmds;
18665 + for (i = 0; count; i++) {
18666 + if (test_bit(i, cast_ulp(set->used_pmds))) {
18667 + val = pfm_arch_read_pmd(ctx, i);
18668 + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
18669 + if (!(val & wmask)) {
18670 + __set_bit(i, cast_ulp(set->povfl_pmds));
18671 + set->npend_ovfls++;
18673 + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask);
18675 + set->pmds[i].value = val;
18679 + /* 0 means: no need to save PMDs at upper level */
18684 + * pfm_p6_quiesce_pmu -- stop monitoring without grabbing any lock
18686 + * called from NMI interrupt handler to immediately stop monitoring
18687 + * cannot grab any lock, including perfmon related locks
18689 +static void __kprobes pfm_p6_quiesce(void)
18692 + * quiesce PMU by clearing available registers that have
18693 + * the start/stop capability
18695 + * P6 processors only have enable bit on PERFEVTSEL0
18697 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
18698 + wrmsrl(MSR_P6_EVNTSEL0, 0);
18702 + * Counters have 40 bits implemented. However they are designed such
18703 + * that bits [32-39] are sign extensions of bit 31. As such the
18704 + * effective width of a counter for P6-like PMU is 31 bits only.
18706 + * See IA-32 Intel Architecture Software developer manual Vol 3B
18708 +static struct pfm_pmu_config pfm_p6_pmu_conf = {
18709 + .pmu_name = "Intel P6 processor Family",
18710 + .counter_width = 31,
18711 + .pmd_desc = pfm_p6_pmd_desc,
18712 + .pmc_desc = pfm_p6_pmc_desc,
18713 + .num_pmc_entries = PFM_P6_NUM_PMCS,
18714 + .num_pmd_entries = PFM_P6_NUM_PMDS,
18715 + .probe_pmu = pfm_p6_probe_pmu,
18716 + .version = "1.0",
18717 + .flags = PFM_PMU_BUILTIN_FLAG,
18718 + .owner = THIS_MODULE,
18719 + .pmu_info = &pfm_p6_pmu_info
18722 +static int __init pfm_p6_pmu_init_module(void)
18724 + return pfm_pmu_register(&pfm_p6_pmu_conf);
18727 +static void __exit pfm_p6_pmu_cleanup_module(void)
18729 + pfm_pmu_unregister(&pfm_p6_pmu_conf);
18732 +module_init(pfm_p6_pmu_init_module);
18733 +module_exit(pfm_p6_pmu_cleanup_module);
18734 diff --git a/arch/x86/perfmon/perfmon_pebs_core_smpl.c b/arch/x86/perfmon/perfmon_pebs_core_smpl.c
18735 new file mode 100644
18736 index 0000000..eeb9174
18738 +++ b/arch/x86/perfmon/perfmon_pebs_core_smpl.c
18741 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
18742 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
18744 + * This file implements the Precise Event Based Sampling (PEBS)
18745 + * sampling format for Intel Core and Atom processors.
18747 + * This program is free software; you can redistribute it and/or
18748 + * modify it under the terms of version 2 of the GNU General Public
18749 + * License as published by the Free Software Foundation.
18751 + * This program is distributed in the hope that it will be useful,
18752 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18753 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18754 + * General Public License for more details.
18756 + * You should have received a copy of the GNU General Public License
18757 + * along with this program; if not, write to the Free Software
18758 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18761 +#include <linux/kernel.h>
18762 +#include <linux/types.h>
18763 +#include <linux/module.h>
18764 +#include <linux/init.h>
18765 +#include <linux/smp.h>
18766 +#include <linux/perfmon_kern.h>
18768 +#include <asm/msr.h>
18769 +#include <asm/perfmon_pebs_core_smpl.h>
18771 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
18772 +MODULE_DESCRIPTION("Intel Core Precise Event-Based Sampling (PEBS)");
18773 +MODULE_LICENSE("GPL");
18775 +#define ALIGN_PEBS(a, order) \
18776 + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1)
18778 +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */
18780 +static int pfm_pebs_core_fmt_validate(u32 flags, u16 npmds, void *data)
18782 + struct pfm_pebs_core_smpl_arg *arg = data;
18783 + size_t min_buf_size;
18786 + * need to define at least the size of the buffer
18788 + if (data == NULL) {
18789 + PFM_DBG("no argument passed");
18794 + * compute min buf size. npmds is the maximum number
18795 + * of implemented PMD registers.
18797 + min_buf_size = sizeof(struct pfm_pebs_core_smpl_hdr)
18798 + + sizeof(struct pfm_pebs_core_smpl_entry)
18799 + + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */
18801 + PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
18807 + * must hold at least the buffer header + one minimally sized entry
18809 + if (arg->buf_size < min_buf_size)
18815 +static int pfm_pebs_core_fmt_get_size(unsigned int flags, void *data, size_t *size)
18817 + struct pfm_pebs_core_smpl_arg *arg = data;
18820 + * size has been validated in pfm_pebs_core_fmt_validate()
18822 + *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER);
18827 +static int pfm_pebs_core_fmt_init(struct pfm_context *ctx, void *buf,
18828 + u32 flags, u16 npmds, void *data)
18830 + struct pfm_arch_context *ctx_arch;
18831 + struct pfm_pebs_core_smpl_hdr *hdr;
18832 + struct pfm_pebs_core_smpl_arg *arg = data;
18833 + u64 pebs_start, pebs_end;
18834 + struct pfm_ds_area_core *ds;
18836 + ctx_arch = pfm_ctx_arch(ctx);
18842 + * align PEBS buffer base
18844 + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER);
18845 + pebs_end = pebs_start + arg->buf_size + 1;
18847 + hdr->version = PFM_PEBS_CORE_SMPL_VERSION;
18848 + hdr->buf_size = arg->buf_size;
18849 + hdr->overflows = 0;
18852 + * express PEBS buffer base as offset from the end of the header
18854 + hdr->start_offs = pebs_start - (unsigned long)(hdr+1);
18857 + * PEBS buffer boundaries
18859 + ds->pebs_buf_base = pebs_start;
18860 + ds->pebs_abs_max = pebs_end;
18863 + * PEBS starting position
18865 + ds->pebs_index = pebs_start;
18868 + * PEBS interrupt threshold
18870 + ds->pebs_intr_thres = pebs_start
18871 + + arg->intr_thres
18872 + * sizeof(struct pfm_pebs_core_smpl_entry);
18875 + * save counter reset value for PEBS counter
18877 + ds->pebs_cnt_reset = arg->cnt_reset;
18880 + * keep track of DS AREA
18882 + ctx_arch->ds_area = ds;
18883 + ctx_arch->flags.use_ds = 1;
18884 + ctx_arch->flags.use_pebs = 1;
18886 + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%llx "
18887 + "pebs_end=0x%llx ds=%p pebs_thres=0x%llx cnt_reset=0x%llx",
18889 + (unsigned long long)hdr->buf_size,
18890 + (unsigned long long)hdr->start_offs,
18891 + (unsigned long long)pebs_start,
18892 + (unsigned long long)pebs_end,
18894 + (unsigned long long)ds->pebs_intr_thres,
18895 + (unsigned long long)ds->pebs_cnt_reset);
18900 +static int pfm_pebs_core_fmt_handler(struct pfm_context *ctx,
18901 + unsigned long ip, u64 tstamp, void *data)
18903 + struct pfm_pebs_core_smpl_hdr *hdr;
18904 + struct pfm_ovfl_arg *arg;
18906 + hdr = ctx->smpl_addr;
18907 + arg = &ctx->ovfl_arg;
18909 + PFM_DBG_ovfl("buffer full");
18911 + * increment number of buffer overflows.
18912 + * important to detect duplicate set of samples.
18914 + hdr->overflows++;
18917 + * request notification and masking of monitoring.
18918 + * Notification is still subject to the overflowed
18919 + * register having the FL_NOTIFY flag set.
18921 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
18923 + return -ENOBUFS; /* we are full, sorry */
18926 +static int pfm_pebs_core_fmt_restart(int is_active, u32 *ovfl_ctrl,
18929 + struct pfm_pebs_core_smpl_hdr *hdr = buf;
18932 + * reset index to base of buffer
18934 + hdr->ds.pebs_index = hdr->ds.pebs_buf_base;
18936 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
18941 +static int pfm_pebs_core_fmt_exit(void *buf)
18946 +static struct pfm_smpl_fmt pebs_core_fmt = {
18947 + .fmt_name = PFM_PEBS_CORE_SMPL_NAME,
18948 + .fmt_version = 0x1,
18949 + .fmt_arg_size = sizeof(struct pfm_pebs_core_smpl_arg),
18950 + .fmt_validate = pfm_pebs_core_fmt_validate,
18951 + .fmt_getsize = pfm_pebs_core_fmt_get_size,
18952 + .fmt_init = pfm_pebs_core_fmt_init,
18953 + .fmt_handler = pfm_pebs_core_fmt_handler,
18954 + .fmt_restart = pfm_pebs_core_fmt_restart,
18955 + .fmt_exit = pfm_pebs_core_fmt_exit,
18956 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
18957 + .owner = THIS_MODULE,
18960 +static int __init pfm_pebs_core_fmt_init_module(void)
18962 + if (!cpu_has_pebs) {
18963 + PFM_INFO("processor does not have PEBS support");
18967 + * cpu_has_pebs is not enough to identify Intel Core PEBS
18968 + * which is different fro Pentium 4 PEBS. Therefore we do
18969 + * a more detailed check here
18971 + if (current_cpu_data.x86 != 6) {
18972 + PFM_INFO("not a supported Intel processor");
18976 + switch (current_cpu_data.x86_model) {
18977 + case 15: /* Merom */
18978 + case 23: /* Penryn */
18979 + case 28: /* Atom (Silverthorne) */
18980 + case 29: /* Dunnington */
18983 + PFM_INFO("not a supported Intel processor");
18986 + return pfm_fmt_register(&pebs_core_fmt);
18989 +static void __exit pfm_pebs_core_fmt_cleanup_module(void)
18991 + pfm_fmt_unregister(&pebs_core_fmt);
18994 +module_init(pfm_pebs_core_fmt_init_module);
18995 +module_exit(pfm_pebs_core_fmt_cleanup_module);
18996 diff --git a/arch/x86/perfmon/perfmon_pebs_p4_smpl.c b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c
18997 new file mode 100644
18998 index 0000000..f4e9fd2
19000 +++ b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c
19003 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
19004 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19006 + * This file implements the Precise Event Based Sampling (PEBS)
19007 + * sampling format. It supports the following processors:
19008 + * - 32-bit Pentium 4 or other Netburst-based processors
19009 + * - 64-bit Pentium 4 or other Netburst-based processors
19011 + * This program is free software; you can redistribute it and/or
19012 + * modify it under the terms of version 2 of the GNU General Public
19013 + * License as published by the Free Software Foundation.
19015 + * This program is distributed in the hope that it will be useful,
19016 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19017 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19018 + * General Public License for more details.
19020 + * You should have received a copy of the GNU General Public License
19021 + * along with this program; if not, write to the Free Software
19022 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19025 +#include <linux/kernel.h>
19026 +#include <linux/types.h>
19027 +#include <linux/module.h>
19028 +#include <linux/init.h>
19029 +#include <linux/smp.h>
19030 +#include <linux/perfmon_kern.h>
19032 +#include <asm/msr.h>
19033 +#include <asm/perfmon_pebs_p4_smpl.h>
19035 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
19036 +MODULE_DESCRIPTION("Intel P4 Precise Event-Based Sampling (PEBS)");
19037 +MODULE_LICENSE("GPL");
19039 +#define ALIGN_PEBS(a, order) \
19040 + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1)
19042 +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */
19044 +static int pfm_pebs_p4_fmt_validate(u32 flags, u16 npmds, void *data)
19046 + struct pfm_pebs_p4_smpl_arg *arg = data;
19047 + size_t min_buf_size;
19050 + * need to define at least the size of the buffer
19052 + if (data == NULL) {
19053 + PFM_DBG("no argument passed");
19058 + * compute min buf size. npmds is the maximum number
19059 + * of implemented PMD registers.
19061 + min_buf_size = sizeof(struct pfm_pebs_p4_smpl_hdr)
19062 + + sizeof(struct pfm_pebs_p4_smpl_entry)
19063 + + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */
19065 + PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
19071 + * must hold at least the buffer header + one minimally sized entry
19073 + if (arg->buf_size < min_buf_size)
19079 +static int pfm_pebs_p4_fmt_get_size(unsigned int flags, void *data, size_t *size)
19081 + struct pfm_pebs_p4_smpl_arg *arg = data;
19084 + * size has been validated in pfm_pebs_p4_fmt_validate()
19086 + *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER);
19091 +static int pfm_pebs_p4_fmt_init(struct pfm_context *ctx, void *buf,
19092 + u32 flags, u16 npmds, void *data)
19094 + struct pfm_arch_context *ctx_arch;
19095 + struct pfm_pebs_p4_smpl_hdr *hdr;
19096 + struct pfm_pebs_p4_smpl_arg *arg = data;
19097 + unsigned long pebs_start, pebs_end;
19098 + struct pfm_ds_area_p4 *ds;
19100 + ctx_arch = pfm_ctx_arch(ctx);
19106 + * align PEBS buffer base
19108 + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER);
19109 + pebs_end = pebs_start + arg->buf_size + 1;
19111 + hdr->version = PFM_PEBS_P4_SMPL_VERSION;
19112 + hdr->buf_size = arg->buf_size;
19113 + hdr->overflows = 0;
19116 + * express PEBS buffer base as offset from the end of the header
19118 + hdr->start_offs = pebs_start - (unsigned long)(hdr+1);
19121 + * PEBS buffer boundaries
19123 + ds->pebs_buf_base = pebs_start;
19124 + ds->pebs_abs_max = pebs_end;
19127 + * PEBS starting position
19129 + ds->pebs_index = pebs_start;
19132 + * PEBS interrupt threshold
19134 + ds->pebs_intr_thres = pebs_start
19135 + + arg->intr_thres * sizeof(struct pfm_pebs_p4_smpl_entry);
19138 + * save counter reset value for PEBS counter
19140 + ds->pebs_cnt_reset = arg->cnt_reset;
19143 + * keep track of DS AREA
19145 + ctx_arch->ds_area = ds;
19146 + ctx_arch->flags.use_pebs = 1;
19147 + ctx_arch->flags.use_ds = 1;
19149 + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%lx "
19150 + "pebs_end=0x%lx ds=%p pebs_thres=0x%lx cnt_reset=0x%llx",
19152 + (unsigned long long)hdr->buf_size,
19153 + (unsigned long long)hdr->start_offs,
19157 + ds->pebs_intr_thres,
19158 + (unsigned long long)ds->pebs_cnt_reset);
19163 +static int pfm_pebs_p4_fmt_handler(struct pfm_context *ctx,
19164 + unsigned long ip, u64 tstamp, void *data)
19166 + struct pfm_pebs_p4_smpl_hdr *hdr;
19167 + struct pfm_ovfl_arg *arg;
19169 + hdr = ctx->smpl_addr;
19170 + arg = &ctx->ovfl_arg;
19172 + PFM_DBG_ovfl("buffer full");
19174 + * increment number of buffer overflows.
19175 + * important to detect duplicate set of samples.
19177 + hdr->overflows++;
19180 + * request notification and masking of monitoring.
19181 + * Notification is still subject to the overflowed
19182 + * register having the FL_NOTIFY flag set.
19184 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
19186 + return -ENOBUFS; /* we are full, sorry */
19189 +static int pfm_pebs_p4_fmt_restart(int is_active, u32 *ovfl_ctrl,
19192 + struct pfm_pebs_p4_smpl_hdr *hdr = buf;
19195 + * reset index to base of buffer
19197 + hdr->ds.pebs_index = hdr->ds.pebs_buf_base;
19199 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
19204 +static int pfm_pebs_p4_fmt_exit(void *buf)
19209 +static struct pfm_smpl_fmt pebs_p4_fmt = {
19210 + .fmt_name = PFM_PEBS_P4_SMPL_NAME,
19211 + .fmt_version = 0x1,
19212 + .fmt_arg_size = sizeof(struct pfm_pebs_p4_smpl_arg),
19213 + .fmt_validate = pfm_pebs_p4_fmt_validate,
19214 + .fmt_getsize = pfm_pebs_p4_fmt_get_size,
19215 + .fmt_init = pfm_pebs_p4_fmt_init,
19216 + .fmt_handler = pfm_pebs_p4_fmt_handler,
19217 + .fmt_restart = pfm_pebs_p4_fmt_restart,
19218 + .fmt_exit = pfm_pebs_p4_fmt_exit,
19219 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
19220 + .owner = THIS_MODULE,
19223 +static int __init pfm_pebs_p4_fmt_init_module(void)
19227 + if (!cpu_has_pebs) {
19228 + PFM_INFO("processor does not have PEBS support");
19231 + if (current_cpu_data.x86 != 15) {
19232 + PFM_INFO("not an Intel Pentium 4");
19236 + ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map))
19237 + / current_cpu_data.x86_max_cores) > 1;
19241 + if (ht_enabled) {
19242 + PFM_INFO("PEBS not available because HyperThreading is on");
19245 + return pfm_fmt_register(&pebs_p4_fmt);
19248 +static void __exit pfm_pebs_p4_fmt_cleanup_module(void)
19250 + pfm_fmt_unregister(&pebs_p4_fmt);
19253 +module_init(pfm_pebs_p4_fmt_init_module);
19254 +module_exit(pfm_pebs_p4_fmt_cleanup_module);
19255 diff --git a/include/asm-mips/Kbuild b/include/asm-mips/Kbuild
19256 index 7897f05..7ed16fc 100644
19257 --- a/include/asm-mips/Kbuild
19258 +++ b/include/asm-mips/Kbuild
19260 include include/asm-generic/Kbuild.asm
19262 header-y += cachectl.h sgidefs.h sysmips.h
19263 +header-y += perfmon.h
19264 diff --git a/include/asm-mips/perfmon.h b/include/asm-mips/perfmon.h
19265 new file mode 100644
19266 index 0000000..7915c17
19268 +++ b/include/asm-mips/perfmon.h
19271 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
19272 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19274 + * This file contains mips64 specific definitions for the perfmon
19277 + * This file MUST never be included directly. Use linux/perfmon.h.
19279 + * This program is free software; you can redistribute it and/or
19280 + * modify it under the terms of version 2 of the GNU General Public
19281 + * License as published by the Free Software Foundation.
19283 + * This program is distributed in the hope that it will be useful,
19284 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19285 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19286 + * General Public License for more details.
19288 + * You should have received a copy of the GNU General Public License
19289 + * along with this program; if not, write to the Free Software
19290 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19293 +#ifndef _ASM_MIPS64_PERFMON_H_
19294 +#define _ASM_MIPS64_PERFMON_H_
19297 + * arch-specific user visible interface definitions
19300 +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
19301 +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
19303 +#endif /* _ASM_MIPS64_PERFMON_H_ */
19304 diff --git a/include/asm-mips/perfmon_kern.h b/include/asm-mips/perfmon_kern.h
19305 new file mode 100644
19306 index 0000000..7d213df
19308 +++ b/include/asm-mips/perfmon_kern.h
19311 + * Copyright (c) 2005 Philip Mucci.
19313 + * Based on other versions:
19314 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
19315 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19317 + * This file contains mips64 specific definitions for the perfmon
19320 + * This program is free software; you can redistribute it and/or
19321 + * modify it under the terms of version 2 of the GNU General Public
19322 + * License as published by the Free Software Foundation.
19324 + * This program is distributed in the hope that it will be useful,
19325 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19326 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19327 + * General Public License for more details.
19329 + * You should have received a copy of the GNU General Public License
19330 + * along with this program; if not, write to the Free Software
19331 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19334 +#ifndef _ASM_MIPS64_PERFMON_KERN_H_
19335 +#define _ASM_MIPS64_PERFMON_KERN_H_
19339 +#ifdef CONFIG_PERFMON
19340 +#include <linux/unistd.h>
19341 +#include <asm/cacheflush.h>
19343 +#define PFM_ARCH_PMD_STK_ARG 2
19344 +#define PFM_ARCH_PMC_STK_ARG 2
19346 +struct pfm_arch_pmu_info {
19350 +#define MIPS64_CONFIG_PMC_MASK (1 << 4)
19351 +#define MIPS64_PMC_INT_ENABLE_MASK (1 << 4)
19352 +#define MIPS64_PMC_CNT_ENABLE_MASK (0xf)
19353 +#define MIPS64_PMC_EVT_MASK (0x7 << 6)
19354 +#define MIPS64_PMC_CTR_MASK (1 << 31)
19355 +#define MIPS64_PMD_INTERRUPT (1 << 31)
19357 +/* Coprocessor register 25 contains the PMU interface. */
19358 +/* Sel 0 is control for counter 0 */
19359 +/* Sel 1 is count for counter 0. */
19360 +/* Sel 2 is control for counter 1. */
19361 +/* Sel 3 is count for counter 1. */
19365 +31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
19366 +M 0--------------------------------------------------------------0 Event-- IE U S K EXL
19368 +M 31 If this bit is one, another pair of Performance Control
19369 +and Counter registers is implemented at a MTC0
19371 +Event 8:5 Counter event enabled for this counter. Possible events
19372 +are listed in Table 6-30. R/W Undefined
19374 +IE 4 Counter Interrupt Enable. This bit masks bit 31 of the
19375 +associated count register from the interrupt exception
19376 +request output. R/W 0
19378 +U 3 Count in User Mode. When this bit is set, the specified
19379 +event is counted in User Mode. R/W Undefined
19381 +S 2 Count in Supervisor Mode. When this bit is set, the
19382 +specified event is counted in Supervisor Mode. R/W Undefined
19384 +K 1 Count in Kernel Mode. When this bit is set, count the
19385 +event in Kernel Mode when EXL and ERL both are 0. R/W Undefined
19387 +EXL 0 Count when EXL. When this bit is set, count the event
19388 +when EXL = 1 and ERL = 0. R/W Undefined
19391 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
19394 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
19395 + struct pfm_event_set *set)
19398 +static inline void pfm_arch_serialize(void)
19403 + * MIPS does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
19404 + * this routine needs to do it when switching sets on overflow
19406 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
19407 + struct pfm_event_set *set)
19409 + pfm_save_pmds(ctx, set);
19412 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
19413 + unsigned int cnum, u64 value)
19416 + * we only write to the actual register when monitoring is
19417 + * active (pfm_start was issued)
19419 + if (ctx && (ctx->flags.started == 0))
19422 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
19424 + write_c0_perfctrl0(value);
19427 + write_c0_perfctrl1(value);
19430 + write_c0_perfctrl2(value);
19433 + write_c0_perfctrl3(value);
19440 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
19441 + unsigned int cnum, u64 value)
19443 + value &= pfm_pmu_conf->ovfl_mask;
19445 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
19447 + write_c0_perfcntr0(value);
19450 + write_c0_perfcntr1(value);
19453 + write_c0_perfcntr2(value);
19456 + write_c0_perfcntr3(value);
19463 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
19465 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
19467 + return read_c0_perfcntr0();
19470 + return read_c0_perfcntr1();
19473 + return read_c0_perfcntr2();
19476 + return read_c0_perfcntr3();
19484 +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
19486 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
19488 + return read_c0_perfctrl0();
19491 + return read_c0_perfctrl1();
19494 + return read_c0_perfctrl2();
19497 + return read_c0_perfctrl3();
19506 + * For some CPUs, the upper bits of a counter must be set in order for the
19507 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
19508 + * and the upper bits are cleared. This function may be used to set them back.
19510 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
19511 + unsigned int cnum)
19514 + val = pfm_arch_read_pmd(ctx, cnum);
19515 + /* This masks out overflow bit 31 */
19516 + pfm_arch_write_pmd(ctx, cnum, val);
19520 + * At certain points, perfmon needs to know if monitoring has been
19521 + * explicitely started/stopped by user via pfm_start/pfm_stop. The
19522 + * information is tracked in ctx.flags.started. However on certain
19523 + * architectures, it may be possible to start/stop directly from
19524 + * user level with a single assembly instruction bypassing
19525 + * the kernel. This function must be used to determine by
19526 + * an arch-specific mean if monitoring is actually started/stopped.
19528 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
19530 + return ctx->flags.started;
19533 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
19534 + struct pfm_context *ctx)
19537 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
19538 + struct pfm_context *ctx)
19541 +static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
19542 + struct pfm_context *ctx)
19544 +int pfm_arch_ctxswout_thread(struct task_struct *task,
19545 + struct pfm_context *ctx);
19547 +int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
19548 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
19549 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
19550 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
19551 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
19552 +char *pfm_arch_get_pmu_module_name(void);
19554 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
19555 + struct pfm_event_set *set)
19557 + pfm_arch_stop(current, ctx);
19559 + * we mark monitoring as stopped to avoid
19560 + * certain side effects especially in
19561 + * pfm_switch_sets_from_intr() on
19562 + * pfm_arch_restore_pmcs()
19564 + ctx->flags.started = 0;
19568 + * unfreeze PMU from pfm_do_interrupt_handler()
19569 + * ctx may be NULL for spurious
19571 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
19576 + PFM_DBG_ovfl("state=%d", ctx->state);
19578 + ctx->flags.started = 1;
19580 + if (ctx->state == PFM_CTX_MASKED)
19583 + pfm_arch_restore_pmcs(ctx, ctx->active_set);
19587 + * this function is called from the PMU interrupt handler ONLY.
19588 + * On MIPS, the PMU is frozen via arch_stop, masking would be implemented
19589 + * via arch-stop as well. Given that the PMU is already stopped when
19590 + * entering the interrupt handler, we do not need to stop it again, so
19591 + * this function is a nop.
19593 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
19594 + struct pfm_event_set *set)
19598 + * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
19599 + * need to start here.
19601 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
19602 + struct pfm_event_set *set)
19604 + pfm_arch_start(current, ctx);
19607 +static inline int pfm_arch_context_create(struct pfm_context *ctx,
19613 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
19621 + * function called from pfm_setfl_sane(). Context is locked
19622 + * and interrupts are masked.
19623 + * The value of flags is the value of ctx_flags as passed by
19626 + * function must check arch-specific set flags.
19628 + * 1 when flags are valid
19632 +pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
19637 +static inline int pfm_arch_init(void)
19642 +static inline void pfm_arch_init_percpu(void)
19645 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
19650 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
19653 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
19658 +static inline void pfm_arch_pmu_release(void)
19661 +#ifdef CONFIG_PERFMON_FLUSH
19663 + * due to cache aliasing problem on MIPS, it is necessary to flush
19664 + * pages out of the cache when they are modified.
19666 +static inline void pfm_cacheflush(void *addr, unsigned int len)
19668 + unsigned long start, end;
19670 + start = (unsigned long)addr & PAGE_MASK;
19671 + end = ((unsigned long)addr + len + PAGE_SIZE - 1) & PAGE_MASK;
19673 + while (start < end) {
19674 + flush_data_cache_page(start);
19675 + start += PAGE_SIZE;
19679 +static inline void pfm_cacheflush(void *addr, unsigned int len)
19683 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
19686 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
19689 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
19694 +static inline int pfm_arch_get_base_syscall(void)
19696 + if (test_thread_flag(TIF_32BIT_ADDR)) {
19697 + if (test_thread_flag(TIF_32BIT_REGS))
19698 + return __NR_O32_Linux+330;
19699 + return __NR_N32_Linux+293;
19701 + return __NR_64_Linux+289;
19704 +struct pfm_arch_context {
19708 +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
19710 + * MIPS may need extra alignment requirements for the sampling buffer
19712 +#ifdef CONFIG_PERFMON_SMPL_ALIGN
19713 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0x4000
19715 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
19718 +#endif /* CONFIG_PERFMON */
19720 +#endif /* __KERNEL__ */
19721 +#endif /* _ASM_MIPS64_PERFMON_KERN_H_ */
19722 diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
19723 index a944eda..470cdfc 100644
19724 --- a/include/asm-mips/system.h
19725 +++ b/include/asm-mips/system.h
19726 @@ -67,6 +67,10 @@ do { \
19727 __mips_mt_fpaff_switch_to(prev); \
19729 __save_dsp(prev); \
19730 + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
19731 + pfm_ctxsw_out(prev, next); \
19732 + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
19733 + pfm_ctxsw_in(prev, next); \
19734 (last) = resume(prev, next, task_thread_info(next)); \
19737 diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
19738 index bb30606..34fd6aa 100644
19739 --- a/include/asm-mips/thread_info.h
19740 +++ b/include/asm-mips/thread_info.h
19741 @@ -114,6 +114,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
19742 #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
19743 #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
19744 #define TIF_SECCOMP 4 /* secure computing */
19745 +#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */
19746 #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
19747 #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
19748 #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
19749 @@ -124,6 +125,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
19750 #define TIF_32BIT_REGS 22 /* also implies 16/32 fprs */
19751 #define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */
19752 #define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */
19753 +#define TIF_PERFMON_CTXSW 25 /* perfmon needs ctxsw calls */
19754 #define TIF_SYSCALL_TRACE 31 /* syscall trace active */
19756 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
19757 @@ -140,6 +142,8 @@ register struct thread_info *__current_thread_info __asm__("$28");
19758 #define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS)
19759 #define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
19760 #define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
19761 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
19762 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
19764 /* work to do on interrupt/exception return */
19765 #define _TIF_WORK_MASK (0x0000ffef & ~_TIF_SECCOMP)
19766 diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
19767 index a73e153..200f654 100644
19768 --- a/include/asm-mips/unistd.h
19769 +++ b/include/asm-mips/unistd.h
19770 @@ -350,11 +350,23 @@
19771 #define __NR_dup3 (__NR_Linux + 327)
19772 #define __NR_pipe2 (__NR_Linux + 328)
19773 #define __NR_inotify_init1 (__NR_Linux + 329)
19774 +#define __NR_pfm_create_context (__NR_Linux + 330)
19775 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
19776 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
19777 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
19778 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
19779 +#define __NR_pfm_start (__NR_pfm_create_context+5)
19780 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
19781 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
19782 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
19783 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
19784 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
19785 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
19788 * Offset of the last Linux o32 flavoured syscall
19790 -#define __NR_Linux_syscalls 329
19791 +#define __NR_Linux_syscalls 341
19793 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
19795 @@ -656,16 +668,28 @@
19796 #define __NR_dup3 (__NR_Linux + 286)
19797 #define __NR_pipe2 (__NR_Linux + 287)
19798 #define __NR_inotify_init1 (__NR_Linux + 288)
19799 +#define __NR_pfm_create_context (__NR_Linux + 289)
19800 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
19801 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
19802 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
19803 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
19804 +#define __NR_pfm_start (__NR_pfm_create_context+5)
19805 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
19806 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
19807 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
19808 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
19809 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
19810 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
19813 * Offset of the last Linux 64-bit flavoured syscall
19815 -#define __NR_Linux_syscalls 288
19816 +#define __NR_Linux_syscalls 300
19818 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
19820 #define __NR_64_Linux 5000
19821 -#define __NR_64_Linux_syscalls 288
19822 +#define __NR_64_Linux_syscalls 300
19824 #if _MIPS_SIM == _MIPS_SIM_NABI32
19826 @@ -966,16 +990,28 @@
19827 #define __NR_dup3 (__NR_Linux + 290)
19828 #define __NR_pipe2 (__NR_Linux + 291)
19829 #define __NR_inotify_init1 (__NR_Linux + 292)
19830 +#define __NR_pfm_create_context (__NR_Linux + 293)
19831 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
19832 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
19833 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
19834 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
19835 +#define __NR_pfm_start (__NR_pfm_create_context+5)
19836 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
19837 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
19838 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
19839 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
19840 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
19841 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
19844 * Offset of the last N32 flavoured syscall
19846 -#define __NR_Linux_syscalls 292
19847 +#define __NR_Linux_syscalls 304
19849 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
19851 #define __NR_N32_Linux 6000
19852 -#define __NR_N32_Linux_syscalls 292
19853 +#define __NR_N32_Linux_syscalls 304
19857 diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
19858 index 4a8e80c..d7d819e 100644
19859 --- a/include/asm-x86/Kbuild
19860 +++ b/include/asm-x86/Kbuild
19861 @@ -9,6 +9,7 @@ header-y += prctl.h
19862 header-y += ptrace-abi.h
19863 header-y += sigcontext32.h
19864 header-y += ucontext.h
19865 +header-y += perfmon.h
19866 header-y += processor-flags.h
19868 unifdef-y += e820.h
19869 diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
19870 index 61cea9e..275e015 100644
19871 --- a/include/asm-x86/ia32_unistd.h
19872 +++ b/include/asm-x86/ia32_unistd.h
19874 * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
19877 -#define __NR_ia32_restart_syscall 0
19878 -#define __NR_ia32_exit 1
19879 -#define __NR_ia32_read 3
19880 -#define __NR_ia32_write 4
19881 -#define __NR_ia32_sigreturn 119
19882 -#define __NR_ia32_rt_sigreturn 173
19883 +#define __NR_ia32_restart_syscall 0
19884 +#define __NR_ia32_exit 1
19885 +#define __NR_ia32_read 3
19886 +#define __NR_ia32_write 4
19887 +#define __NR_ia32_sigreturn 119
19888 +#define __NR_ia32_rt_sigreturn 173
19889 +#define __NR_ia32_pfm_create_context 333
19891 #endif /* _ASM_X86_64_IA32_UNISTD_H_ */
19892 diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
19893 index a48c7f2..892fe8f 100644
19894 --- a/include/asm-x86/irq_vectors.h
19895 +++ b/include/asm-x86/irq_vectors.h
19897 #define LOCAL_TIMER_VECTOR 0xef
19900 + * Perfmon PMU interrupt vector
19902 +#define LOCAL_PERFMON_VECTOR 0xee
19905 * First APIC vector available to drivers: (vectors 0x30-0xee) we
19906 * start at 0x31(0x41) to spread out vectors evenly between priority
19907 * levels. (0x80 is the syscall vector)
19908 diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
19909 index 9283b60..ac31c2d 100644
19910 --- a/include/asm-x86/mach-default/entry_arch.h
19911 +++ b/include/asm-x86/mach-default/entry_arch.h
19912 @@ -32,4 +32,8 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
19913 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
19916 +#ifdef CONFIG_PERFMON
19917 +BUILD_INTERRUPT(pmu_interrupt,LOCAL_PERFMON_VECTOR)
19921 diff --git a/include/asm-x86/perfmon.h b/include/asm-x86/perfmon.h
19922 new file mode 100644
19923 index 0000000..906f4b2
19925 +++ b/include/asm-x86/perfmon.h
19928 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
19929 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19931 + * This file contains i386/x86_64 specific definitions for the perfmon
19934 + * This file MUST never be included directly. Use linux/perfmon.h.
19936 + * This program is free software; you can redistribute it and/or
19937 + * modify it under the terms of version 2 of the GNU General Public
19938 + * License as published by the Free Software Foundation.
19940 + * This program is distributed in the hope that it will be useful,
19941 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19942 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19943 + * General Public License for more details.
19945 + * You should have received a copy of the GNU General Public License
19946 + * along with this program; if not, write to the Free Software
19947 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19950 +#ifndef _ASM_X86_PERFMON__H_
19951 +#define _ASM_X86_PERFMON__H_
19954 + * arch-specific user visible interface definitions
19957 +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
19958 +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
19960 +#endif /* _ASM_X86_PERFMON_H_ */
19961 diff --git a/include/asm-x86/perfmon_kern.h b/include/asm-x86/perfmon_kern.h
19962 new file mode 100644
19963 index 0000000..0e5d3a5
19965 +++ b/include/asm-x86/perfmon_kern.h
19968 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
19969 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19971 + * Copyright (c) 2007 Advanced Micro Devices, Inc.
19972 + * Contributed by Robert Richter <robert.richter@amd.com>
19974 + * This file contains X86 Processor Family specific definitions
19975 + * for the perfmon interface. This covers P6, Pentium M, P4/Xeon
19976 + * (32-bit and 64-bit, i.e., EM64T) and AMD X86-64.
19978 + * This program is free software; you can redistribute it and/or
19979 + * modify it under the terms of version 2 of the GNU General Public
19980 + * License as published by the Free Software Foundation.
19982 + * This program is distributed in the hope that it will be useful,
19983 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19984 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19985 + * General Public License for more details.
19987 + * You should have received a copy of the GNU General Public License
19988 + * along with this program; if not, write to the Free Software
19989 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19992 +#ifndef _ASM_X86_PERFMON_KERN_H_
19993 +#define _ASM_X86_PERFMON_KERN_H_
19995 +#ifdef CONFIG_PERFMON
19996 +#include <linux/unistd.h>
19997 +#ifdef CONFIG_4KSTACKS
19998 +#define PFM_ARCH_PMD_STK_ARG 2
19999 +#define PFM_ARCH_PMC_STK_ARG 2
20001 +#define PFM_ARCH_PMD_STK_ARG 4 /* about 700 bytes of stack space */
20002 +#define PFM_ARCH_PMC_STK_ARG 4 /* about 200 bytes of stack space */
20005 +struct pfm_arch_pmu_info {
20006 + u32 flags; /* PMU feature flags */
20008 + * mandatory model-specific callbacks
20010 + int (*stop_save)(struct pfm_context *ctx, struct pfm_event_set *set);
20011 + int (*has_ovfls)(struct pfm_context *ctx);
20012 + void (*quiesce)(void);
20015 + * optional model-specific callbacks
20017 + void (*acquire_pmu_percpu)(void);
20018 + void (*release_pmu_percpu)(void);
20019 + int (*create_context)(struct pfm_context *ctx, u32 ctx_flags);
20020 + void (*free_context)(struct pfm_context *ctx);
20021 + int (*load_context)(struct pfm_context *ctx);
20022 + void (*unload_context)(struct pfm_context *ctx);
20023 + void (*write_pmc)(struct pfm_context *ctx, unsigned int cnum, u64 value);
20024 + void (*write_pmd)(struct pfm_context *ctx, unsigned int cnum, u64 value);
20025 + u64 (*read_pmd)(struct pfm_context *ctx, unsigned int cnum);
20026 + u64 (*read_pmc)(struct pfm_context *ctx, unsigned int cnum);
20027 + void (*nmi_copy_state)(struct pfm_context *ctx);
20028 + void (*restore_pmcs)(struct pfm_context *ctx,
20029 + struct pfm_event_set *set);
20030 + void (*restore_pmds)(struct pfm_context *ctx,
20031 + struct pfm_event_set *set);
20035 + * PMU feature flags
20037 +#define PFM_X86_FL_USE_NMI 0x01 /* user asking for NMI */
20038 +#define PFM_X86_FL_NO_SHARING 0x02 /* no sharing with other subsystems */
20039 +#define PFM_X86_FL_SHARING 0x04 /* PMU is being shared */
20041 +struct pfm_x86_ctx_flags {
20042 + unsigned int insecure:1; /* rdpmc per-thread self-monitoring */
20043 + unsigned int use_pebs:1; /* PEBS used */
20044 + unsigned int use_ds:1; /* DS used */
20045 + unsigned int reserved:29; /* for future use */
20048 +struct pfm_arch_context {
20049 + u64 saved_real_iip; /* instr pointer of last NMI intr */
20050 + struct pfm_x86_ctx_flags flags; /* flags */
20051 + void *ds_area; /* address of DS area (to go away) */
20052 + void *data; /* model-specific data */
20056 + * functions implemented as inline on x86
20060 + * pfm_arch_write_pmc - write a single PMC register
20061 + * @ctx: context to work on
20062 + * @cnum: PMC index
20063 + * @value: PMC 64-bit value
20065 + * in certain situations, ctx may be NULL
20067 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
20068 + unsigned int cnum, u64 value)
20070 + struct pfm_arch_pmu_info *pmu_info;
20072 + pmu_info = pfm_pmu_info();
20075 + * we only write to the actual register when monitoring is
20076 + * active (pfm_start was issued)
20078 + if (ctx && ctx->flags.started == 0)
20082 + * model-specific override, if any
20084 + if (pmu_info->write_pmc) {
20085 + pmu_info->write_pmc(ctx, cnum, value);
20089 + PFM_DBG_ovfl("pfm_arch_write_pmc(0x%lx, 0x%Lx)",
20090 + pfm_pmu_conf->pmc_desc[cnum].hw_addr,
20091 + (unsigned long long) value);
20093 + wrmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
20097 + * pfm_arch_write_pmd - write a single PMD register
20098 + * @ctx: context to work on
20099 + * @cnum: PMD index
20100 + * @value: PMD 64-bit value
20102 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
20103 + unsigned int cnum, u64 value)
20105 + struct pfm_arch_pmu_info *pmu_info;
20107 + pmu_info = pfm_pmu_info();
20110 + * to make sure the counter overflows, we set the
20111 + * upper bits. we also clear any other unimplemented
20112 + * bits as this may cause crash on some processors.
20114 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
20115 + value = (value | ~pfm_pmu_conf->ovfl_mask)
20116 + & ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
20118 + PFM_DBG_ovfl("pfm_arch_write_pmd(0x%lx, 0x%Lx)",
20119 + pfm_pmu_conf->pmd_desc[cnum].hw_addr,
20120 + (unsigned long long) value);
20123 + * model-specific override, if any
20125 + if (pmu_info->write_pmd) {
20126 + pmu_info->write_pmd(ctx, cnum, value);
20130 + wrmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
20134 + * pfm_arch_read_pmd - read a single PMD register
20135 + * @ctx: context to work on
20136 + * @cnum: PMD index
20138 + * return value is register 64-bit value
20140 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
20142 + struct pfm_arch_pmu_info *pmu_info;
20145 + pmu_info = pfm_pmu_info();
20148 + * model-specific override, if any
20150 + if (pmu_info->read_pmd)
20151 + tmp = pmu_info->read_pmd(ctx, cnum);
20153 + rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp);
20155 + PFM_DBG_ovfl("pfm_arch_read_pmd(0x%lx) = 0x%Lx",
20156 + pfm_pmu_conf->pmd_desc[cnum].hw_addr,
20157 + (unsigned long long) tmp);
20162 + * pfm_arch_read_pmc - read a single PMC register
20163 + * @ctx: context to work on
20164 + * @cnum: PMC index
20166 + * return value is register 64-bit value
20168 +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
20170 + struct pfm_arch_pmu_info *pmu_info;
20173 + pmu_info = pfm_pmu_info();
20176 + * model-specific override, if any
20178 + if (pmu_info->read_pmc)
20179 + tmp = pmu_info->read_pmc(ctx, cnum);
20181 + rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp);
20183 + PFM_DBG_ovfl("pfm_arch_read_pmc(0x%lx) = 0x%016Lx",
20184 + pfm_pmu_conf->pmc_desc[cnum].hw_addr,
20185 + (unsigned long long) tmp);
20190 + * pfm_arch_is_active - return non-zero is monitoring has been started
20191 + * @ctx: context to check
20193 + * At certain points, perfmon needs to know if monitoring has been
20194 + * explicitly started.
20196 + * On x86, there is not other way but to use pfm_start/pfm_stop
20197 + * to activate monitoring, thus we can simply check flags.started
20199 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
20201 + return ctx->flags.started;
20206 + * pfm_arch_unload_context - detach context from thread or CPU
20207 + * @ctx: context to detach
20209 + * in system-wide ctx->task is NULL, otherwise it points to the
20210 + * attached thread
20212 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
20214 + struct pfm_arch_pmu_info *pmu_info;
20215 + struct pfm_arch_context *ctx_arch;
20217 + ctx_arch = pfm_ctx_arch(ctx);
20218 + pmu_info = pfm_pmu_info();
20220 + if (ctx_arch->flags.insecure) {
20221 + PFM_DBG("clear cr4.pce");
20222 + clear_in_cr4(X86_CR4_PCE);
20225 + if (pmu_info->unload_context)
20226 + pmu_info->unload_context(ctx);
20230 + * pfm_arch_load_context - attach context to thread or CPU
20231 + * @ctx: context to attach
20233 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
20235 + struct pfm_arch_pmu_info *pmu_info;
20236 + struct pfm_arch_context *ctx_arch;
20239 + ctx_arch = pfm_ctx_arch(ctx);
20240 + pmu_info = pfm_pmu_info();
20243 + * RDPMC authorized in system-wide and
20244 + * per-thread self-monitoring.
20246 + * RDPMC only gives access to counts.
20248 + * The context-switch routine code does not restore
20249 + * all the PMD registers (optimization), thus there
20250 + * is a possible leak of counts there in per-thread
20253 + if (ctx->task == current || ctx->flags.system) {
20254 + PFM_DBG("set cr4.pce");
20255 + set_in_cr4(X86_CR4_PCE);
20256 + ctx_arch->flags.insecure = 1;
20259 + if (pmu_info->load_context)
20260 + ret = pmu_info->load_context(ctx);
20265 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
20266 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
20267 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
20270 + * pfm_arch_unmask_monitoring - unmask monitoring
20271 + * @ctx: context to mask
20272 + * @set: current event set
20274 + * masking is slightly different from stopping in that, it does not undo
20275 + * the pfm_start() issued by user. This is used in conjunction with
20276 + * sampling. Masking means stop monitoring, but do not authorize user
20277 + * to issue pfm_start/stop during that time. Unmasking is achieved via
20278 + * pfm_restart() and also may also depend on the sampling format used.
20280 + * on x86 masking/unmasking use the start/stop mechanism, except
20281 + * that flags.started is not modified.
20283 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
20284 + struct pfm_event_set *set)
20286 + pfm_arch_start(current, ctx);
20290 + * pfm_arch_intr_freeze_pmu - stop monitoring when handling PMU interrupt
20291 + * @ctx: current context
20292 + * @set: current event set
20294 + * called from __pfm_interrupt_handler().
20295 + * ctx is not NULL. ctx is locked. interrupts are masked
20297 + * The following actions must take place:
20298 + * - stop all monitoring to ensure handler has consistent view.
20299 + * - collect overflowed PMDs bitmask into povfls_pmds and
20300 + * npend_ovfls. If no interrupt detected then npend_ovfls
20301 + * must be set to zero.
20303 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
20304 + struct pfm_event_set *set)
20307 + * on X86, freezing is equivalent to stopping
20309 + pfm_arch_stop(current, ctx);
20312 + * we mark monitoring as stopped to avoid
20313 + * certain side effects especially in
20314 + * pfm_switch_sets_from_intr() and
20315 + * pfm_arch_restore_pmcs()
20317 + ctx->flags.started = 0;
20321 + * pfm_arch_intr_unfreeze_pmu - conditionally reactive monitoring
20322 + * @ctx: current context
20324 + * current context may be not when dealing when spurious interrupts
20326 + * Must re-activate monitoring if context is not MASKED.
20327 + * interrupts are masked.
20329 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
20334 + PFM_DBG_ovfl("state=%d", ctx->state);
20337 + * restore flags.started which is cleared in
20338 + * pfm_arch_intr_freeze_pmu()
20340 + ctx->flags.started = 1;
20342 + if (ctx->state == PFM_CTX_MASKED)
20345 + pfm_arch_restore_pmcs(ctx, ctx->active_set);
20349 + * pfm_arch_setfl_sane - check arch/model specific event set flags
20350 + * @ctx: context to work on
20351 + * @flags: event set flags as passed by user
20353 + * called from pfm_setfl_sane(). Context is locked. Interrupts are masked.
20356 + * 0 when flags are valid
20359 +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
20365 + * pfm_arch_ovfl_reset_pmd - reset pmd on overflow
20366 + * @ctx: current context
20367 + * @cnum: PMD index
20369 + * On some CPUs, the upper bits of a counter must be set in order for the
20370 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
20371 + * and the upper bits are cleared. This function may be used to set them back.
20373 + * For x86, the current version loses whatever is remaining in the counter,
20374 + * which is usually has a small count. In order not to loose this count,
20375 + * we do a read-modify-write to set the upper bits while preserving the
20376 + * low-order bits. This is slow but works.
20378 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum)
20381 + val = pfm_arch_read_pmd(ctx, cnum);
20382 + pfm_arch_write_pmd(ctx, cnum, val);
20386 + * pfm_arch_context_create - create context
20387 + * @ctx: newly created context
20388 + * @flags: context flags as passed by user
20390 + * called from __pfm_create_context()
20392 +static inline int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
20394 + struct pfm_arch_pmu_info *pmu_info;
20396 + pmu_info = pfm_pmu_info();
20398 + if (pmu_info->create_context)
20399 + return pmu_info->create_context(ctx, ctx_flags);
20405 + * pfm_arch_context_free - free context
20406 + * @ctx: context to free
20408 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
20410 + struct pfm_arch_pmu_info *pmu_info;
20412 + pmu_info = pfm_pmu_info();
20414 + if (pmu_info->free_context)
20415 + pmu_info->free_context(ctx);
20419 + * pfm_arch_clear_pmd_ovfl_cond - alter the pmds in such a way that they
20420 + * will not cause cause interrupts when unused.
20422 + * This is a nop on x86
20424 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
20425 + struct pfm_event_set *set)
20429 + * functions implemented in arch/x86/perfmon/perfmon.c
20431 +int pfm_arch_init(void);
20432 +void pfm_arch_resend_irq(struct pfm_context *ctx);
20434 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
20435 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
20437 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
20438 +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg);
20439 +void pfm_arch_pmu_config_remove(void);
20440 +char *pfm_arch_get_pmu_module_name(void);
20441 +int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds);
20442 +void pfm_arch_pmu_release(void);
20445 + * pfm_arch_serialize - make PMU modifications visible to subsequent instructions
20447 + * This is a nop on x86
20449 +static inline void pfm_arch_serialize(void)
20453 + * on x86, the PMDs are already saved by pfm_arch_freeze_pmu()
20454 + * when entering the PMU interrupt handler, thus, we do not need
20455 + * to save them again in pfm_switch_sets_from_intr()
20457 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
20458 + struct pfm_event_set *set)
20462 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
20463 + struct pfm_context *ctx)
20466 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
20467 + struct pfm_context *ctx)
20470 +static inline void pfm_arch_init_percpu(void)
20473 +static inline void pfm_cacheflush(void *addr, unsigned int len)
20477 + * this function is called from the PMU interrupt handler ONLY.
20478 + * On x86, the PMU is frozen via arch_stop, masking would be implemented
20479 + * via arch-stop as well. Given that the PMU is already stopped when
20480 + * entering the interrupt handler, we do not need to stop it again, so
20481 + * this function is a nop.
20483 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
20484 + struct pfm_event_set *set)
20488 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
20491 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
20494 +static inline int pfm_arch_get_base_syscall(void)
20497 + /* 32-bit syscall definition coming from ia32_unistd.h */
20498 + if (test_thread_flag(TIF_IA32))
20499 + return __NR_ia32_pfm_create_context;
20501 + return __NR_pfm_create_context;
20504 +#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context))
20506 + * x86 does not need extra alignment requirements for the sampling buffer
20508 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
20510 +asmlinkage void pmu_interrupt(void);
20512 +#endif /* CONFIG_PEFMON */
20514 +#endif /* _ASM_X86_PERFMON_KERN_H_ */
20515 diff --git a/include/asm-x86/perfmon_pebs_core_smpl.h b/include/asm-x86/perfmon_pebs_core_smpl.h
20516 new file mode 100644
20517 index 0000000..4a12e0d
20519 +++ b/include/asm-x86/perfmon_pebs_core_smpl.h
20522 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
20523 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
20525 + * This program is free software; you can redistribute it and/or
20526 + * modify it under the terms of version 2 of the GNU General Public
20527 + * License as published by the Free Software Foundation.
20529 + * This program is distributed in the hope that it will be useful,
20530 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20531 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20532 + * General Public License for more details.
20534 + * You should have received a copy of the GNU General Public License
20535 + * along with this program; if not, write to the Free Software
20536 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20539 + * This file implements the sampling format to support Intel
20540 + * Precise Event Based Sampling (PEBS) feature of Intel Core
20541 + * processors, such as Intel Core 2.
20545 + * This is a hardware feature to enhance sampling by providing
20546 + * better precision as to where a sample is taken. This avoids the
20547 + * typical skew in the instruction one can observe with any
20548 + * interrupt-based sampling technique.
20550 + * PEBS also lowers sampling overhead significantly by having the
20551 + * processor store samples instead of the OS. PMU interrupt are only
20552 + * generated after multiple samples are written.
20554 + * Another benefit of PEBS is that samples can be captured inside
20555 + * critical sections where interrupts are masked.
20557 + * How does it work?
20558 + * PEBS effectively implements a Hw buffer. The Os must pass a region
20559 + * of memory where samples are to be stored. The region can have any
20560 + * size. The OS must also specify the sampling period to reload. The PMU
20561 + * will interrupt when it reaches the end of the buffer or a specified
20562 + * threshold location inside the memory region.
20564 + * The description of the buffer is stored in the Data Save Area (DS).
20565 + * The samples are stored sequentially in the buffer. The format of the
20566 + * buffer is fixed and specified in the PEBS documentation. The sample
20567 + * format does not change between 32-bit and 64-bit modes unlike on the
20568 + * Pentium 4 version of PEBS.
20570 + * PEBS does not work when HyperThreading is enabled due to certain MSR
20571 + * being shared being to two threads.
20573 + * What does the format do?
20574 + * It provides access to the PEBS feature for both 32-bit and 64-bit
20575 + * processors that support it.
20577 + * The same code and data structures are used for both 32-bit and 64-bi
20578 + * modes. A single format name is used for both modes. In 32-bit mode,
20579 + * some of the extended registers are written to zero in each sample.
20581 + * It is important to realize that the format provides a zero-copy
20582 + * environment for the samples, i.e,, the OS never touches the
20583 + * samples. Whatever the processor write is directly accessible to
20586 + * Parameters to the buffer can be passed via pfm_create_context() in
20587 + * the pfm_pebs_smpl_arg structure.
20589 +#ifndef __PERFMON_PEBS_CORE_SMPL_H__
20590 +#define __PERFMON_PEBS_CORE_SMPL_H__ 1
20593 + * The 32-bit and 64-bit formats are identical, thus we use only
20594 + * one name for the format.
20596 +#define PFM_PEBS_CORE_SMPL_NAME "pebs_core"
20599 + * format specific parameters (passed at context creation)
20601 + * intr_thres: index from start of buffer of entry where the
20602 + * PMU interrupt must be triggered. It must be several samples
20603 + * short of the end of the buffer.
20605 +struct pfm_pebs_core_smpl_arg {
20606 + u64 cnt_reset; /* counter reset value */
20607 + size_t buf_size; /* size of the PEBS buffer in bytes */
20608 + size_t intr_thres;/* index of PEBS interrupt threshold entry */
20609 + u64 reserved[6]; /* for future use */
20613 + * Data Save Area (32 and 64-bit mode)
20615 + * The DS area is exposed to the user. To determine the number
20616 + * of samples available in PEBS, it is necessary to substract
20617 + * pebs_index from pebs_base.
20619 + * Layout of the structure is mandated by hardware and specified
20620 + * in the Intel documentation.
20622 +struct pfm_ds_area_core {
20623 + u64 bts_buf_base;
20626 + u64 bts_intr_thres;
20627 + u64 pebs_buf_base;
20629 + u64 pebs_abs_max;
20630 + u64 pebs_intr_thres;
20631 + u64 pebs_cnt_reset;
20635 + * This header is at the beginning of the sampling buffer returned to the user.
20637 + * Because of PEBS alignement constraints, the actual PEBS buffer area does
20638 + * not necessarily begin right after the header. The hdr_start_offs must be
20639 + * used to compute the first byte of the buffer. The offset is defined as
20640 + * the number of bytes between the end of the header and the beginning of
20641 + * the buffer. As such the formula is:
20642 + * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
20644 +struct pfm_pebs_core_smpl_hdr {
20645 + u64 overflows; /* #overflows for buffer */
20646 + size_t buf_size; /* bytes in the buffer */
20647 + size_t start_offs; /* actual buffer start offset */
20648 + u32 version; /* smpl format version */
20649 + u32 reserved1; /* for future use */
20650 + u64 reserved2[5]; /* for future use */
20651 + struct pfm_ds_area_core ds; /* data save area */
20655 + * Sample format as mandated by Intel documentation.
20656 + * The same format is used in both 32 and 64 bit modes.
20658 +struct pfm_pebs_core_smpl_entry {
20669 + u64 r8; /* 0 in 32-bit mode */
20670 + u64 r9; /* 0 in 32-bit mode */
20671 + u64 r10; /* 0 in 32-bit mode */
20672 + u64 r11; /* 0 in 32-bit mode */
20673 + u64 r12; /* 0 in 32-bit mode */
20674 + u64 r13; /* 0 in 32-bit mode */
20675 + u64 r14; /* 0 in 32-bit mode */
20676 + u64 r15; /* 0 in 32-bit mode */
20679 +#define PFM_PEBS_CORE_SMPL_VERSION_MAJ 1U
20680 +#define PFM_PEBS_CORE_SMPL_VERSION_MIN 0U
20681 +#define PFM_PEBS_CORE_SMPL_VERSION (((PFM_PEBS_CORE_SMPL_VERSION_MAJ&0xffff)<<16)|\
20682 + (PFM_PEBS_CORE_SMPL_VERSION_MIN & 0xffff))
20684 +#endif /* __PERFMON_PEBS_CORE_SMPL_H__ */
20685 diff --git a/include/asm-x86/perfmon_pebs_p4_smpl.h b/include/asm-x86/perfmon_pebs_p4_smpl.h
20686 new file mode 100644
20687 index 0000000..26b51b4
20689 +++ b/include/asm-x86/perfmon_pebs_p4_smpl.h
20692 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
20693 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
20695 + * This program is free software; you can redistribute it and/or
20696 + * modify it under the terms of version 2 of the GNU General Public
20697 + * License as published by the Free Software Foundation.
20699 + * This program is distributed in the hope that it will be useful,
20700 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20701 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20702 + * General Public License for more details.
20704 + * You should have received a copy of the GNU General Public License
20705 + * along with this program; if not, write to the Free Software
20706 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20709 + * This file implements the sampling format to support Intel
20710 + * Precise Event Based Sampling (PEBS) feature of Pentium 4
20711 + * and other Netburst-based processors. Not to be used for
20712 + * Intel Core-based processors.
20716 + * This is a hardware feature to enhance sampling by providing
20717 + * better precision as to where a sample is taken. This avoids the
20718 + * typical skew in the instruction one can observe with any
20719 + * interrupt-based sampling technique.
20721 + * PEBS also lowers sampling overhead significantly by having the
20722 + * processor store samples instead of the OS. PMU interrupt are only
20723 + * generated after multiple samples are written.
20725 + * Another benefit of PEBS is that samples can be captured inside
20726 + * critical sections where interrupts are masked.
20728 + * How does it work?
20729 + * PEBS effectively implements a Hw buffer. The Os must pass a region
20730 + * of memory where samples are to be stored. The region can have any
20731 + * size. The OS must also specify the sampling period to reload. The PMU
20732 + * will interrupt when it reaches the end of the buffer or a specified
20733 + * threshold location inside the memory region.
20735 + * The description of the buffer is stored in the Data Save Area (DS).
20736 + * The samples are stored sequentially in the buffer. The format of the
20737 + * buffer is fixed and specified in the PEBS documentation. The sample
20738 + * format changes between 32-bit and 64-bit modes due to extended register
20741 + * PEBS does not work when HyperThreading is enabled due to certain MSR
20742 + * being shared being to two threads.
20744 + * What does the format do?
20745 + * It provides access to the PEBS feature for both 32-bit and 64-bit
20746 + * processors that support it.
20748 + * The same code is used for both 32-bit and 64-bit modes, but different
20749 + * format names are used because the two modes are not compatible due to
20750 + * data model and register file differences. Similarly the public data
20751 + * structures describing the samples are different.
20753 + * It is important to realize that the format provides a zero-copy environment
20754 + * for the samples, i.e,, the OS never touches the samples. Whatever the
20755 + * processor write is directly accessible to the user.
20757 + * Parameters to the buffer can be passed via pfm_create_context() in
20758 + * the pfm_pebs_smpl_arg structure.
20760 + * It is not possible to mix a 32-bit PEBS application on top of a 64-bit
20763 +#ifndef __PERFMON_PEBS_P4_SMPL_H__
20764 +#define __PERFMON_PEBS_P4_SMPL_H__ 1
20768 + * The 32-bit and 64-bit formats are not compatible, thus we have
20769 + * two different identifications so that 32-bit programs running on
20770 + * 64-bit OS will fail to use the 64-bit PEBS support.
20772 +#define PFM_PEBS_P4_SMPL_NAME "pebs32_p4"
20774 +#define PFM_PEBS_P4_SMPL_NAME "pebs64_p4"
20778 + * format specific parameters (passed at context creation)
20780 + * intr_thres: index from start of buffer of entry where the
20781 + * PMU interrupt must be triggered. It must be several samples
20782 + * short of the end of the buffer.
20784 +struct pfm_pebs_p4_smpl_arg {
20785 + u64 cnt_reset; /* counter reset value */
20786 + size_t buf_size; /* size of the PEBS buffer in bytes */
20787 + size_t intr_thres;/* index of PEBS interrupt threshold entry */
20788 + u64 reserved[6]; /* for future use */
20792 + * Data Save Area (32 and 64-bit mode)
20794 + * The DS area must be exposed to the user because this is the only
20795 + * way to report on the number of valid entries recorded by the CPU.
20796 + * This is required when the buffer is not full, i..e, there was not
20799 + * Layout of the structure is mandated by hardware and specified in
20800 + * the Intel documentation.
20802 +struct pfm_ds_area_p4 {
20803 + unsigned long bts_buf_base;
20804 + unsigned long bts_index;
20805 + unsigned long bts_abs_max;
20806 + unsigned long bts_intr_thres;
20807 + unsigned long pebs_buf_base;
20808 + unsigned long pebs_index;
20809 + unsigned long pebs_abs_max;
20810 + unsigned long pebs_intr_thres;
20811 + u64 pebs_cnt_reset;
20815 + * This header is at the beginning of the sampling buffer returned to the user.
20817 + * Because of PEBS alignement constraints, the actual PEBS buffer area does
20818 + * not necessarily begin right after the header. The hdr_start_offs must be
20819 + * used to compute the first byte of the buffer. The offset is defined as
20820 + * the number of bytes between the end of the header and the beginning of
20821 + * the buffer. As such the formula is:
20822 + * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
20824 +struct pfm_pebs_p4_smpl_hdr {
20825 + u64 overflows; /* #overflows for buffer */
20826 + size_t buf_size; /* bytes in the buffer */
20827 + size_t start_offs; /* actual buffer start offset */
20828 + u32 version; /* smpl format version */
20829 + u32 reserved1; /* for future use */
20830 + u64 reserved2[5]; /* for future use */
20831 + struct pfm_ds_area_p4 ds; /* data save area */
20835 + * 64-bit PEBS record format is described in
20836 + * http://www.intel.com/technology/64bitextensions/30083502.pdf
20838 + * The format does not peek at samples. The sample structure is only
20839 + * used to ensure that the buffer is large enough to accomodate one
20843 +struct pfm_pebs_p4_smpl_entry {
20856 +struct pfm_pebs_p4_smpl_entry {
20878 +#define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U
20879 +#define PFM_PEBS_P4_SMPL_VERSION_MIN 0U
20880 +#define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\
20881 + (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff))
20883 +#endif /* __PERFMON_PEBS_P4_SMPL_H__ */
20884 diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
20885 index da0a675..b3a6ae9 100644
20886 --- a/include/asm-x86/thread_info.h
20887 +++ b/include/asm-x86/thread_info.h
20888 @@ -71,6 +71,7 @@ struct thread_info {
20889 * Warning: layout of LSW is hardcoded in entry.S
20891 #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
20892 +#define TIF_PERFMON_WORK 1 /* work for pfm_handle_work() */
20893 #define TIF_SIGPENDING 2 /* signal pending */
20894 #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
20895 #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
20896 @@ -91,6 +92,7 @@ struct thread_info {
20897 #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
20898 #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
20899 #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
20900 +#define TIF_PERFMON_CTXSW 28 /* perfmon needs ctxsw calls */
20902 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
20903 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
20904 @@ -112,6 +114,8 @@ struct thread_info {
20905 #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
20906 #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
20907 #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
20908 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
20909 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
20911 /* work to do in syscall_trace_enter() */
20912 #define _TIF_WORK_SYSCALL_ENTRY \
20913 @@ -133,12 +137,12 @@ struct thread_info {
20915 /* Only used for 64 bit */
20916 #define _TIF_DO_NOTIFY_MASK \
20917 - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
20918 + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERFMON_WORK)
20920 /* flags to check in __switch_to() */
20921 #define _TIF_WORK_CTXSW \
20922 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \
20924 + _TIF_NOTSC|_TIF_PERFMON_CTXSW)
20926 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
20927 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
20928 diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
20929 index d739467..5d8cca1 100644
20930 --- a/include/asm-x86/unistd_32.h
20931 +++ b/include/asm-x86/unistd_32.h
20932 @@ -338,9 +338,23 @@
20933 #define __NR_dup3 330
20934 #define __NR_pipe2 331
20935 #define __NR_inotify_init1 332
20936 +#define __NR_pfm_create_context 333
20937 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
20938 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
20939 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
20940 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
20941 +#define __NR_pfm_start (__NR_pfm_create_context+5)
20942 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
20943 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
20944 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
20945 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
20946 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
20947 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
20951 +#define NR_syscalls 345
20953 #define __ARCH_WANT_IPC_PARSE_VERSION
20954 #define __ARCH_WANT_OLD_READDIR
20955 #define __ARCH_WANT_OLD_STAT
20956 diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
20957 index 3a341d7..75dac98 100644
20958 --- a/include/asm-x86/unistd_64.h
20959 +++ b/include/asm-x86/unistd_64.h
20960 @@ -653,7 +653,30 @@ __SYSCALL(__NR_dup3, sys_dup3)
20961 __SYSCALL(__NR_pipe2, sys_pipe2)
20962 #define __NR_inotify_init1 294
20963 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
20965 +#define __NR_pfm_create_context 295
20966 +__SYSCALL(__NR_pfm_create_context, sys_pfm_create_context)
20967 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
20968 +__SYSCALL(__NR_pfm_write_pmcs, sys_pfm_write_pmcs)
20969 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
20970 +__SYSCALL(__NR_pfm_write_pmds, sys_pfm_write_pmds)
20971 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
20972 + __SYSCALL(__NR_pfm_read_pmds, sys_pfm_read_pmds)
20973 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
20974 +__SYSCALL(__NR_pfm_load_context, sys_pfm_load_context)
20975 +#define __NR_pfm_start (__NR_pfm_create_context+5)
20976 +__SYSCALL(__NR_pfm_start, sys_pfm_start)
20977 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
20978 +__SYSCALL(__NR_pfm_stop, sys_pfm_stop)
20979 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
20980 +__SYSCALL(__NR_pfm_restart, sys_pfm_restart)
20981 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
20982 +__SYSCALL(__NR_pfm_create_evtsets, sys_pfm_create_evtsets)
20983 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
20984 +__SYSCALL(__NR_pfm_getinfo_evtsets, sys_pfm_getinfo_evtsets)
20985 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
20986 +__SYSCALL(__NR_pfm_delete_evtsets, sys_pfm_delete_evtsets)
20987 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
20988 +__SYSCALL(__NR_pfm_unload_context, sys_pfm_unload_context)
20991 #define __ARCH_WANT_OLD_READDIR
20992 diff --git a/include/linux/Kbuild b/include/linux/Kbuild
20993 index b68ec09..d37036a 100644
20994 --- a/include/linux/Kbuild
20995 +++ b/include/linux/Kbuild
20996 @@ -162,6 +162,8 @@ header-y += video_decoder.h
20997 header-y += video_encoder.h
20998 header-y += videotext.h
21000 +header-y += perfmon.h
21001 +header-y += perfmon_dfl_smpl.h
21003 unifdef-y += acct.h
21005 diff --git a/include/linux/perfmon.h b/include/linux/perfmon.h
21006 new file mode 100644
21007 index 0000000..5d9b977
21009 +++ b/include/linux/perfmon.h
21012 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
21013 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21015 + * This program is free software; you can redistribute it and/or
21016 + * modify it under the terms of version 2 of the GNU General Public
21017 + * License as published by the Free Software Foundation.
21019 + * This program is distributed in the hope that it will be useful,
21020 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21021 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21022 + * General Public License for more details.
21024 + * You should have received a copy of the GNU General Public License
21025 + * along with this program; if not, write to the Free Software
21026 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21030 +#ifndef __LINUX_PERFMON_H__
21031 +#define __LINUX_PERFMON_H__
21034 + * This file contains all the user visible generic definitions for the
21035 + * interface. Model-specific user-visible definitions are located in
21036 + * the asm/perfmon.h file.
21040 + * include arch-specific user interface definitions
21042 +#include <asm/perfmon.h>
21045 + * defined by each arch
21047 +#define PFM_MAX_PMCS PFM_ARCH_MAX_PMCS
21048 +#define PFM_MAX_PMDS PFM_ARCH_MAX_PMDS
21051 + * number of elements for each type of bitvector
21052 + * all bitvectors use u64 fixed size type on all architectures.
21054 +#define PFM_BVSIZE(x) (((x)+(sizeof(__u64)<<3)-1) / (sizeof(__u64)<<3))
21055 +#define PFM_PMD_BV PFM_BVSIZE(PFM_MAX_PMDS)
21056 +#define PFM_PMC_BV PFM_BVSIZE(PFM_MAX_PMCS)
21059 + * register flags layout:
21060 + * bit[00-15] : generic flags
21061 + * bit[16-31] : arch-specific flags
21063 + * PFM_REGFL_NO_EMUL64: must be set on the PMC controlling the PMD
21065 +#define PFM_REGFL_OVFL_NOTIFY 0x1 /* PMD: send notification on event */
21066 +#define PFM_REGFL_RANDOM 0x2 /* PMD: randomize value after event */
21067 +#define PFM_REGFL_NO_EMUL64 0x4 /* PMC: no 64-bit emulation */
21070 + * event set flags layout:
21071 + * bits[00-15] : generic flags
21072 + * bits[16-31] : arch-specific flags (see asm/perfmon.h)
21074 +#define PFM_SETFL_OVFL_SWITCH 0x01 /* enable switch on overflow */
21075 +#define PFM_SETFL_TIME_SWITCH 0x02 /* enable switch on timeout */
21078 + * argument to pfm_create_context() system call
21079 + * structure shared with user level
21081 +struct pfarg_ctx {
21082 + __u32 ctx_flags; /* noblock/block/syswide */
21083 + __u32 ctx_reserved1; /* for future use */
21084 + __u64 ctx_reserved2[7]; /* for future use */
21088 + * context flags layout:
21089 + * bits[00-15]: generic flags
21090 + * bits[16-31]: arch-specific flags (see perfmon_const.h)
21092 +#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user notifications */
21093 +#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */
21094 +#define PFM_FL_OVFL_NO_MSG 0x80 /* no overflow msgs */
21097 + * argument to pfm_write_pmcs() system call.
21098 + * structure shared with user level
21100 +struct pfarg_pmc {
21101 + __u16 reg_num; /* which register */
21102 + __u16 reg_set; /* event set for this register */
21103 + __u32 reg_flags; /* REGFL flags */
21104 + __u64 reg_value; /* pmc value */
21105 + __u64 reg_reserved2[4]; /* for future use */
21109 + * argument to pfm_write_pmds() and pfm_read_pmds() system calls.
21110 + * structure shared with user level
21112 +struct pfarg_pmd {
21113 + __u16 reg_num; /* which register */
21114 + __u16 reg_set; /* event set for this register */
21115 + __u32 reg_flags; /* REGFL flags */
21116 + __u64 reg_value; /* initial pmc/pmd value */
21117 + __u64 reg_long_reset; /* value to reload after notification */
21118 + __u64 reg_short_reset; /* reset after counter overflow */
21119 + __u64 reg_last_reset_val; /* return: PMD last reset value */
21120 + __u64 reg_ovfl_switch_cnt; /* #overflows before switch */
21121 + __u64 reg_reset_pmds[PFM_PMD_BV]; /* reset on overflow */
21122 + __u64 reg_smpl_pmds[PFM_PMD_BV]; /* record in sample */
21123 + __u64 reg_smpl_eventid; /* opaque event identifier */
21124 + __u64 reg_random_mask; /* bitmask used to limit random value */
21125 + __u32 reg_random_seed; /* seed for randomization (OBSOLETE) */
21126 + __u32 reg_reserved2[7]; /* for future use */
21130 + * optional argument to pfm_start() system call. Pass NULL if not needed.
21131 + * structure shared with user level
21133 +struct pfarg_start {
21134 + __u16 start_set; /* event set to start with */
21135 + __u16 start_reserved1; /* for future use */
21136 + __u32 start_reserved2; /* for future use */
21137 + __u64 reserved3[3]; /* for future use */
21141 + * argument to pfm_load_context() system call.
21142 + * structure shared with user level
21144 +struct pfarg_load {
21145 + __u32 load_pid; /* thread or CPU to attach to */
21146 + __u16 load_set; /* set to load first */
21147 + __u16 load_reserved1; /* for future use */
21148 + __u64 load_reserved2[3]; /* for future use */
21152 + * argument to pfm_create_evtsets() and pfm_delete_evtsets() system calls.
21153 + * structure shared with user level.
21155 +struct pfarg_setdesc {
21156 + __u16 set_id; /* which set */
21157 + __u16 set_reserved1; /* for future use */
21158 + __u32 set_flags; /* SETFL flags */
21159 + __u64 set_timeout; /* switch timeout in nsecs */
21160 + __u64 reserved[6]; /* for future use */
21164 + * argument to pfm_getinfo_evtsets() system call.
21165 + * structure shared with user level
21167 +struct pfarg_setinfo {
21168 + __u16 set_id; /* which set */
21169 + __u16 set_reserved1; /* for future use */
21170 + __u32 set_flags; /* out: SETFL flags */
21171 + __u64 set_ovfl_pmds[PFM_PMD_BV]; /* out: last ovfl PMDs */
21172 + __u64 set_runs; /* out: #times the set was active */
21173 + __u64 set_timeout; /* out: eff/leftover timeout (nsecs) */
21174 + __u64 set_act_duration; /* out: time set was active in nsecs */
21175 + __u64 set_avail_pmcs[PFM_PMC_BV];/* out: available PMCs */
21176 + __u64 set_avail_pmds[PFM_PMD_BV];/* out: available PMDs */
21177 + __u64 set_reserved3[6]; /* for future use */
21181 + * default value for the user and group security parameters in
21182 + * /proc/sys/kernel/perfmon/sys_group
21183 + * /proc/sys/kernel/perfmon/task_group
21185 +#define PFM_GROUP_PERM_ANY -1 /* any user/group */
21188 + * overflow notification message.
21189 + * structure shared with user level
21191 +struct pfarg_ovfl_msg {
21192 + __u32 msg_type; /* message type: PFM_MSG_OVFL */
21193 + __u32 msg_ovfl_pid; /* process id */
21194 + __u16 msg_active_set; /* active set at overflow */
21195 + __u16 msg_ovfl_cpu; /* cpu of PMU interrupt */
21196 + __u32 msg_ovfl_tid; /* thread id */
21197 + __u64 msg_ovfl_ip; /* IP on PMU intr */
21198 + __u64 msg_ovfl_pmds[PFM_PMD_BV];/* overflowed PMDs */
21201 +#define PFM_MSG_OVFL 1 /* an overflow happened */
21202 +#define PFM_MSG_END 2 /* task to which context was attached ended */
21205 + * generic notification message (union).
21206 + * union shared with user level
21210 + struct pfarg_ovfl_msg pfm_ovfl_msg;
21214 + * perfmon version number
21216 +#define PFM_VERSION_MAJ 2U
21217 +#define PFM_VERSION_MIN 82U
21218 +#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|\
21219 + (PFM_VERSION_MIN & 0xffff))
21220 +#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff)
21221 +#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
21223 +#endif /* __LINUX_PERFMON_H__ */
21224 diff --git a/include/linux/perfmon_dfl_smpl.h b/include/linux/perfmon_dfl_smpl.h
21225 new file mode 100644
21226 index 0000000..e0817a8
21228 +++ b/include/linux/perfmon_dfl_smpl.h
21231 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
21232 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21234 + * This file implements the new dfl sampling buffer format
21235 + * for perfmon2 subsystem.
21237 + * This program is free software; you can redistribute it and/or
21238 + * modify it under the terms of version 2 of the GNU General Public
21239 + * License as published by the Free Software Foundation.
21241 + * This program is distributed in the hope that it will be useful,
21242 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21243 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21244 + * General Public License for more details.
21246 + * You should have received a copy of the GNU General Public License
21247 + * along with this program; if not, write to the Free Software
21248 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21251 +#ifndef __PERFMON_DFL_SMPL_H__
21252 +#define __PERFMON_DFL_SMPL_H__ 1
21255 + * format specific parameters (passed at context creation)
21257 +struct pfm_dfl_smpl_arg {
21258 + __u64 buf_size; /* size of the buffer in bytes */
21259 + __u32 buf_flags; /* buffer specific flags */
21260 + __u32 reserved1; /* for future use */
21261 + __u64 reserved[6]; /* for future use */
21265 + * This header is at the beginning of the sampling buffer returned to the user.
21266 + * It is directly followed by the first record.
21268 +struct pfm_dfl_smpl_hdr {
21269 + __u64 hdr_count; /* how many valid entries */
21270 + __u64 hdr_cur_offs; /* current offset from top of buffer */
21271 + __u64 hdr_overflows; /* #overflows for buffer */
21272 + __u64 hdr_buf_size; /* bytes in the buffer */
21273 + __u64 hdr_min_buf_space;/* minimal buffer size (internal use) */
21274 + __u32 hdr_version; /* smpl format version */
21275 + __u32 hdr_buf_flags; /* copy of buf_flags */
21276 + __u64 hdr_reserved[10]; /* for future use */
21280 + * Entry header in the sampling buffer. The header is directly followed
21281 + * with the values of the PMD registers of interest saved in increasing
21282 + * index order: PMD4, PMD5, and so on. How many PMDs are present depends
21283 + * on how the session was programmed.
21285 + * In the case where multiple counters overflow at the same time, multiple
21286 + * entries are written consecutively.
21288 + * last_reset_value member indicates the initial value of the overflowed PMD.
21290 +struct pfm_dfl_smpl_entry {
21291 + __u32 pid; /* thread id (for NPTL, this is gettid()) */
21292 + __u16 ovfl_pmd; /* index of overflowed PMD for this sample */
21293 + __u16 reserved; /* for future use */
21294 + __u64 last_reset_val; /* initial value of overflowed PMD */
21295 + __u64 ip; /* where did the overflow intr happened */
21296 + __u64 tstamp; /* overflow timetamp */
21297 + __u16 cpu; /* cpu on which the overfow occurred */
21298 + __u16 set; /* event set active when overflow ocurred */
21299 + __u32 tgid; /* thread group id (getpid() for NPTL) */
21302 +#define PFM_DFL_SMPL_VERSION_MAJ 1U
21303 +#define PFM_DFL_SMPL_VERSION_MIN 0U
21304 +#define PFM_DFL_SMPL_VERSION (((PFM_DFL_SMPL_VERSION_MAJ&0xffff)<<16)|\
21305 + (PFM_DFL_SMPL_VERSION_MIN & 0xffff))
21307 +#endif /* __PERFMON_DFL_SMPL_H__ */
21308 diff --git a/include/linux/perfmon_fmt.h b/include/linux/perfmon_fmt.h
21309 new file mode 100644
21310 index 0000000..82a6a90
21312 +++ b/include/linux/perfmon_fmt.h
21315 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
21316 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21318 + * Interface for custom sampling buffer format modules
21320 + * This program is free software; you can redistribute it and/or
21321 + * modify it under the terms of version 2 of the GNU General Public
21322 + * License as published by the Free Software Foundation.
21324 + * This program is distributed in the hope that it will be useful,
21325 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21326 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21327 + * General Public License for more details.
21329 + * You should have received a copy of the GNU General Public License
21330 + * along with this program; if not, write to the Free Software
21331 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21334 +#ifndef __PERFMON_FMT_H__
21335 +#define __PERFMON_FMT_H__ 1
21337 +#include <linux/kobject.h>
21339 +typedef int (*fmt_validate_t)(u32 flags, u16 npmds, void *arg);
21340 +typedef int (*fmt_getsize_t)(u32 flags, void *arg, size_t *size);
21341 +typedef int (*fmt_init_t)(struct pfm_context *ctx, void *buf, u32 flags,
21342 + u16 nmpds, void *arg);
21343 +typedef int (*fmt_restart_t)(int is_active, u32 *ovfl_ctrl, void *buf);
21344 +typedef int (*fmt_exit_t)(void *buf);
21345 +typedef int (*fmt_handler_t)(struct pfm_context *ctx,
21346 + unsigned long ip, u64 stamp, void *data);
21348 +struct pfm_smpl_fmt {
21349 + char *fmt_name; /* name of the format (required) */
21350 + size_t fmt_arg_size; /* size of fmt args for ctx create */
21351 + u32 fmt_flags; /* format specific flags */
21352 + u32 fmt_version; /* format version number */
21354 + fmt_validate_t fmt_validate; /* validate context flags */
21355 + fmt_getsize_t fmt_getsize; /* get size for sampling buffer */
21356 + fmt_init_t fmt_init; /* initialize buffer area */
21357 + fmt_handler_t fmt_handler; /* overflow handler (required) */
21358 + fmt_restart_t fmt_restart; /* restart after notification */
21359 + fmt_exit_t fmt_exit; /* context termination */
21361 + struct list_head fmt_list; /* internal use only */
21363 + struct kobject kobj; /* sysfs internal use only */
21364 + struct module *owner; /* pointer to module owner */
21365 + u32 fmt_qdepth; /* Max notify queue depth (required) */
21367 +#define to_smpl_fmt(n) container_of(n, struct pfm_smpl_fmt, kobj)
21369 +#define PFM_FMTFL_IS_BUILTIN 0x1 /* fmt is compiled in */
21371 + * we need to know whether the format is builtin or compiled
21375 +#define PFM_FMT_BUILTIN_FLAG 0 /* not built as a module */
21377 +#define PFM_FMT_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */
21380 +int pfm_fmt_register(struct pfm_smpl_fmt *fmt);
21381 +int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt);
21382 +void pfm_sysfs_builtin_fmt_add(void);
21384 +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt);
21385 +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt);
21387 +#endif /* __PERFMON_FMT_H__ */
21388 diff --git a/include/linux/perfmon_kern.h b/include/linux/perfmon_kern.h
21389 new file mode 100644
21390 index 0000000..6c3b527
21392 +++ b/include/linux/perfmon_kern.h
21395 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
21396 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21398 + * This program is free software; you can redistribute it and/or
21399 + * modify it under the terms of version 2 of the GNU General Public
21400 + * License as published by the Free Software Foundation.
21402 + * This program is distributed in the hope that it will be useful,
21403 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21404 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21405 + * General Public License for more details.
21407 + * You should have received a copy of the GNU General Public License
21408 + * along with this program; if not, write to the Free Software
21409 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21413 +#ifndef __LINUX_PERFMON_KERN_H__
21414 +#define __LINUX_PERFMON_KERN_H__
21416 + * This file contains all the definitions of data structures, variables, macros
21417 + * that are to be shared between generic code and arch-specific code
21419 + * For generic only definitions, use perfmon/perfmon_priv.h
21421 +#ifdef CONFIG_PERFMON
21423 +#include <linux/file.h>
21424 +#include <linux/sched.h>
21425 +#include <linux/perfmon.h>
21428 + * system adminstrator configuration controls available via
21429 + * the /sys/kerne/perfmon interface
21431 +struct pfm_controls {
21432 + u32 debug; /* debugging control bitmask */
21433 + gid_t sys_group; /* gid to create a syswide context */
21434 + gid_t task_group; /* gid to create a per-task context */
21435 + u32 flags; /* control flags (see below) */
21436 + size_t arg_mem_max; /* maximum vector argument size */
21437 + size_t smpl_buffer_mem_max; /* max buf mem, -1 for infinity */
21439 +extern struct pfm_controls pfm_controls;
21444 +#define PFM_CTRL_FL_RW_EXPERT 0x1 /* bypass reserved fields on read/write */
21450 + u64 value; /* 64-bit value */
21451 + u64 lval; /* last reset value */
21452 + u64 ovflsw_thres; /* #ovfls left before switch */
21453 + u64 long_reset; /* long reset value on overflow */
21454 + u64 short_reset; /* short reset value on overflow */
21455 + u64 reset_pmds[PFM_PMD_BV]; /* pmds to reset on overflow */
21456 + u64 smpl_pmds[PFM_PMD_BV]; /* pmds to record on overflow */
21457 + u64 mask; /* range mask for random value */
21458 + u64 ovflsw_ref_thres; /* #ovfls before next set */
21459 + u64 eventid; /* opaque event identifier */
21460 + u32 flags; /* notify/do not notify */
21464 + * event_set: encapsulates the full PMU state
21466 +struct pfm_event_set {
21467 + struct list_head list; /* ordered chain of sets */
21468 + u16 id; /* set identification */
21469 + u16 nused_pmds; /* max number of used PMDs */
21470 + u16 nused_pmcs; /* max number of used PMCs */
21471 + u16 pad1; /* paddding */
21472 + u32 flags; /* public flags */
21473 + u32 priv_flags; /* private flags (see below) */
21474 + u64 runs; /* # of activations */
21475 + u32 npend_ovfls; /* number of pending PMD overflow */
21476 + u32 pad2; /* padding */
21477 + u64 used_pmds[PFM_PMD_BV]; /* used PMDs */
21478 + u64 povfl_pmds[PFM_PMD_BV]; /* pending overflowed PMDs */
21479 + u64 ovfl_pmds[PFM_PMD_BV]; /* last overflowed PMDs */
21480 + u64 reset_pmds[PFM_PMD_BV]; /* PMDs to reset after overflow */
21481 + u64 ovfl_notify[PFM_PMD_BV]; /* notify on overflow */
21482 + u64 used_pmcs[PFM_PMC_BV]; /* used PMCs */
21483 + u64 pmcs[PFM_MAX_PMCS]; /* PMC values */
21485 + struct pfm_pmd pmds[PFM_MAX_PMDS];
21487 + ktime_t hrtimer_exp; /* switch timeout reference */
21488 + ktime_t hrtimer_rem; /* per-thread remainder timeout */
21490 + u64 duration_start; /* start time in ns */
21491 + u64 duration; /* total active ns */
21495 + * common private event set flags (priv_flags)
21497 + * upper 16 bits: for arch-specific use
21498 + * lower 16 bits: for common use
21500 +#define PFM_SETFL_PRIV_MOD_PMDS 0x1 /* PMD register(s) modified */
21501 +#define PFM_SETFL_PRIV_MOD_PMCS 0x2 /* PMC register(s) modified */
21502 +#define PFM_SETFL_PRIV_SWITCH 0x4 /* must switch set on restart */
21503 +#define PFM_SETFL_PRIV_MOD_BOTH (PFM_SETFL_PRIV_MOD_PMDS \
21504 + | PFM_SETFL_PRIV_MOD_PMCS)
21509 +struct pfm_context_flags {
21510 + unsigned int block:1; /* task blocks on user notifications */
21511 + unsigned int system:1; /* do system wide monitoring */
21512 + unsigned int no_msg:1; /* no message sent on overflow */
21513 + unsigned int switch_ovfl:1; /* switch set on counter ovfl */
21514 + unsigned int switch_time:1; /* switch set on timeout */
21515 + unsigned int started:1; /* pfm_start() issued */
21516 + unsigned int work_type:2; /* type of work for pfm_handle_work */
21517 + unsigned int mmap_nlock:1; /* no lock in pfm_release_buf_space */
21518 + unsigned int ia64_v20_compat:1; /* context is IA-64 v2.0 mode */
21519 + unsigned int can_restart:8; /* allowed to issue a PFM_RESTART */
21520 + unsigned int reset_count:8; /* number of pending resets */
21521 + unsigned int is_self:1; /* per-thread and self-montoring */
21522 + unsigned int reserved:5; /* for future use */
21526 + * values for work_type (TIF_PERFMON_WORK must be set)
21528 +#define PFM_WORK_NONE 0 /* nothing to do */
21529 +#define PFM_WORK_RESET 1 /* reset overflowed counters */
21530 +#define PFM_WORK_BLOCK 2 /* block current thread */
21531 +#define PFM_WORK_ZOMBIE 3 /* cleanup zombie context */
21534 + * overflow description argument passed to sampling format
21536 +struct pfm_ovfl_arg {
21537 + u16 ovfl_pmd; /* index of overflowed PMD */
21538 + u16 active_set; /* set active at the time of the overflow */
21539 + u32 ovfl_ctrl; /* control flags */
21540 + u64 pmd_last_reset; /* last reset value of overflowed PMD */
21541 + u64 smpl_pmds_values[PFM_MAX_PMDS]; /* values of other PMDs */
21542 + u64 pmd_eventid; /* eventid associated with PMD */
21543 + u16 num_smpl_pmds; /* number of PMDS in smpl_pmd_values */
21546 + * depth of message queue
21548 + * Depth cannot be bigger than 255 (see reset_count)
21550 +#define PFM_MSGS_ORDER 3 /* log2(number of messages) */
21551 +#define PFM_MSGS_COUNT (1<<PFM_MSGS_ORDER) /* number of messages */
21552 +#define PFM_MSGQ_MASK (PFM_MSGS_COUNT-1)
21555 + * perfmon context state
21557 +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
21558 +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
21559 +#define PFM_CTX_MASKED 3 /* context is loaded, monitoring is masked */
21560 +#define PFM_CTX_ZOMBIE 4 /* context lost owner but still attached */
21563 + * registers description
21565 +struct pfm_regdesc {
21566 + u64 pmcs[PFM_PMC_BV]; /* available PMC */
21567 + u64 pmds[PFM_PMD_BV]; /* available PMD */
21568 + u64 rw_pmds[PFM_PMD_BV]; /* available RW PMD */
21569 + u64 intr_pmds[PFM_PMD_BV]; /* PMD generating intr */
21570 + u64 cnt_pmds[PFM_PMD_BV]; /* PMD counters */
21571 + u16 max_pmc; /* highest+1 avail PMC */
21572 + u16 max_pmd; /* highest+1 avail PMD */
21573 + u16 max_rw_pmd; /* highest+1 avail RW PMD */
21574 + u16 first_intr_pmd; /* first intr PMD */
21575 + u16 max_intr_pmd; /* highest+1 intr PMD */
21576 + u16 num_rw_pmd; /* number of avail RW PMD */
21577 + u16 num_pmcs; /* number of logical PMCS */
21578 + u16 num_pmds; /* number of logical PMDS */
21579 + u16 num_counters; /* number of counting PMD */
21583 + * context: contains all the state of a session
21585 +struct pfm_context {
21586 + spinlock_t lock; /* context protection */
21588 + struct pfm_context_flags flags;
21589 + u32 state; /* current state */
21590 + struct task_struct *task; /* attached task */
21592 + struct completion restart_complete;/* block on notification */
21593 + u64 last_act; /* last activation */
21594 + u32 last_cpu; /* last CPU used (SMP only) */
21595 + u32 cpu; /* cpu bound to context */
21597 + struct pfm_smpl_fmt *smpl_fmt; /* sampling format callbacks */
21598 + void *smpl_addr; /* user smpl buffer base */
21599 + size_t smpl_size; /* user smpl buffer size */
21600 + void *smpl_real_addr;/* actual smpl buffer base */
21601 + size_t smpl_real_size; /* actual smpl buffer size */
21603 + wait_queue_head_t msgq_wait; /* pfm_read() wait queue */
21605 + union pfarg_msg msgq[PFM_MSGS_COUNT];
21609 + struct fasync_struct *async_queue; /* async notification */
21611 + struct pfm_event_set *active_set; /* active set */
21612 + struct list_head set_list; /* ordered list of sets */
21614 + struct pfm_regdesc regs; /* registers available to context */
21617 + * save stack space by allocating temporary variables for
21618 + * pfm_overflow_handler() in pfm_context
21620 + struct pfm_ovfl_arg ovfl_arg;
21621 + u64 tmp_ovfl_notify[PFM_PMD_BV];
21625 + * ovfl_ctrl bitmask (used by interrupt handler)
21627 +#define PFM_OVFL_CTRL_NOTIFY 0x1 /* notify user */
21628 +#define PFM_OVFL_CTRL_RESET 0x2 /* reset overflowed pmds */
21629 +#define PFM_OVFL_CTRL_MASK 0x4 /* mask monitoring */
21630 +#define PFM_OVFL_CTRL_SWITCH 0x8 /* switch sets */
21635 +#define PFM_ERR(f, x...) printk(KERN_ERR "perfmon: " f "\n", ## x)
21636 +#define PFM_WARN(f, x...) printk(KERN_WARNING "perfmon: " f "\n", ## x)
21637 +#define PFM_LOG(f, x...) printk(KERN_NOTICE "perfmon: " f "\n", ## x)
21638 +#define PFM_INFO(f, x...) printk(KERN_INFO "perfmon: " f "\n", ## x)
21643 + * Printk rate limiting is enforced to avoid getting flooded with too many
21644 + * error messages on the console (which could render the machine unresponsive).
21645 + * To get full debug output (turn off ratelimit):
21646 + * $ echo 0 >/proc/sys/kernel/printk_ratelimit
21648 + * debug is a bitmask where bits are defined as follows:
21649 + * bit 0: enable non-interrupt code degbug messages
21650 + * bit 1: enable interrupt code debug messages
21652 +#ifdef CONFIG_PERFMON_DEBUG
21653 +#define _PFM_DBG(lm, f, x...) \
21655 + if (unlikely((pfm_controls.debug & lm) && printk_ratelimit())) { \
21656 + preempt_disable(); \
21657 + printk("perfmon: %s.%d: CPU%d [%d]: " f "\n", \
21658 + __func__, __LINE__, \
21659 + smp_processor_id(), current->pid , ## x); \
21660 + preempt_enable(); \
21664 +#define PFM_DBG(f, x...) _PFM_DBG(0x1, f, ##x)
21665 +#define PFM_DBG_ovfl(f, x...) _PFM_DBG(0x2, f, ## x)
21667 +#define PFM_DBG(f, x...) do {} while (0)
21668 +#define PFM_DBG_ovfl(f, x...) do {} while (0)
21671 +extern struct pfm_pmu_config *pfm_pmu_conf;
21672 +extern int perfmon_disabled;
21674 +static inline struct pfm_arch_context *pfm_ctx_arch(struct pfm_context *c)
21676 + return (struct pfm_arch_context *)(c+1);
21679 +int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
21680 + void **req, void **to_free);
21682 +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *uaddr, size_t usize,
21683 + void **arg, struct pfm_smpl_fmt **fmt);
21685 +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req,
21687 +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
21689 +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count);
21691 +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *req,
21692 + struct task_struct *task);
21693 +int __pfm_unload_context(struct pfm_context *ctx, int *can_release);
21695 +int __pfm_stop(struct pfm_context *ctx, int *release_info);
21696 +int __pfm_restart(struct pfm_context *ctx, int *unblock);
21697 +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start);
21699 +void pfm_free_context(struct pfm_context *ctx);
21701 +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size);
21703 +int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
21704 + unsigned long *flags, void **resume);
21706 + * check_mask bitmask values for pfm_check_task_state()
21708 +#define PFM_CMD_STOPPED 0x01 /* command needs thread stopped */
21709 +#define PFM_CMD_UNLOADED 0x02 /* command needs ctx unloaded */
21710 +#define PFM_CMD_UNLOAD 0x04 /* command is unload */
21712 +int __pfm_create_context(struct pfarg_ctx *req,
21713 + struct pfm_smpl_fmt *fmt,
21716 + struct pfm_context **new_ctx);
21718 +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id,
21721 +int pfm_pmu_conf_get(int autoload);
21722 +void pfm_pmu_conf_put(void);
21724 +int pfm_session_allcpus_acquire(void);
21725 +void pfm_session_allcpus_release(void);
21727 +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize);
21728 +void pfm_smpl_buf_free(struct pfm_context *ctx);
21730 +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name);
21731 +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt);
21733 +void pfm_interrupt_handler(unsigned long iip, struct pt_regs *regs);
21735 +void pfm_resume_task(struct task_struct *t, void *data);
21737 +#include <linux/perfmon_pmu.h>
21738 +#include <linux/perfmon_fmt.h>
21740 +extern const struct file_operations pfm_file_ops;
21742 + * upper limit for count in calls that take vector arguments. This is used
21743 + * to prevent for multiplication overflow when we compute actual storage size
21745 +#define PFM_MAX_ARG_COUNT(m) (INT_MAX/sizeof(*(m)))
21747 +#define cast_ulp(_x) ((unsigned long *)_x)
21749 +#define PFM_NORMAL 0
21750 +#define PFM_COMPAT 1
21752 +void __pfm_exit_thread(void);
21753 +void pfm_ctxsw_in(struct task_struct *prev, struct task_struct *next);
21754 +void pfm_ctxsw_out(struct task_struct *prev, struct task_struct *next);
21755 +void pfm_handle_work(struct pt_regs *regs);
21756 +void __pfm_init_percpu(void *dummy);
21757 +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
21759 +static inline void pfm_exit_thread(void)
21761 + if (current->pfm_context)
21762 + __pfm_exit_thread();
21766 + * include arch-specific kernel level definitions
21768 +#include <asm/perfmon_kern.h>
21770 +static inline void pfm_copy_thread(struct task_struct *task)
21773 + * context or perfmon TIF state is NEVER inherited
21774 + * in child task. Holds for per-thread and system-wide
21776 + task->pfm_context = NULL;
21777 + clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
21778 + clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
21779 + pfm_arch_disarm_handle_work(task);
21784 + * read a single PMD register.
21786 + * virtual PMD registers have special handler.
21787 + * Depends on definitions in asm/perfmon_kern.h
21789 +static inline u64 pfm_read_pmd(struct pfm_context *ctx, unsigned int cnum)
21791 + if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V))
21792 + return pfm_pmu_conf->pmd_sread(ctx, cnum);
21794 + return pfm_arch_read_pmd(ctx, cnum);
21797 + * write a single PMD register.
21799 + * virtual PMD registers have special handler.
21800 + * Depends on definitions in asm/perfmon_kern.h
21802 +static inline void pfm_write_pmd(struct pfm_context *ctx, unsigned int cnum,
21806 + * PMD writes are ignored for read-only registers
21808 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_RO)
21811 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V) {
21812 + pfm_pmu_conf->pmd_swrite(ctx, cnum, value);
21816 + * clear unimplemented bits
21818 + value &= ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
21820 + pfm_arch_write_pmd(ctx, cnum, value);
21823 +void __pfm_init_percpu(void *dummy);
21825 +static inline void pfm_init_percpu(void)
21827 + __pfm_init_percpu(NULL);
21831 + * pfm statistics are available via debugfs
21832 + * and perfmon subdir.
21834 + * When adding/removing new stats, make sure you also
21835 + * update the name table in perfmon_debugfs.c
21837 +enum pfm_stats_names {
21838 + PFM_ST_ovfl_intr_all_count = 0,
21839 + PFM_ST_ovfl_intr_ns,
21840 + PFM_ST_ovfl_intr_spurious_count,
21841 + PFM_ST_ovfl_intr_replay_count,
21842 + PFM_ST_ovfl_intr_regular_count,
21843 + PFM_ST_handle_work_count,
21844 + PFM_ST_ovfl_notify_count,
21845 + PFM_ST_reset_pmds_count,
21846 + PFM_ST_pfm_restart_count,
21847 + PFM_ST_fmt_handler_calls,
21848 + PFM_ST_fmt_handler_ns,
21849 + PFM_ST_set_switch_count,
21850 + PFM_ST_set_switch_ns,
21851 + PFM_ST_set_switch_exp,
21852 + PFM_ST_ctxswin_count,
21853 + PFM_ST_ctxswin_ns,
21854 + PFM_ST_handle_timeout_count,
21855 + PFM_ST_ovfl_intr_nmi_count,
21856 + PFM_ST_ctxswout_count,
21857 + PFM_ST_ctxswout_ns,
21858 + PFM_ST_LAST /* last entry marked */
21860 +#define PFM_NUM_STATS PFM_ST_LAST
21862 +struct pfm_stats {
21863 + u64 v[PFM_NUM_STATS];
21864 + struct dentry *dirs[PFM_NUM_STATS];
21865 + struct dentry *cpu_dir;
21866 + char cpu_name[8];
21869 +#ifdef CONFIG_PERFMON_DEBUG_FS
21870 +#define pfm_stats_get(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]
21871 +#define pfm_stats_inc(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]++
21872 +#define pfm_stats_add(x, y) __get_cpu_var(pfm_stats).v[PFM_ST_##x] += (y)
21873 +void pfm_reset_stats(int cpu);
21875 +#define pfm_stats_get(x)
21876 +#define pfm_stats_inc(x)
21877 +#define pfm_stats_add(x, y)
21878 +static inline void pfm_reset_stats(int cpu)
21884 +DECLARE_PER_CPU(struct pfm_context *, pmu_ctx);
21885 +DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
21886 +DECLARE_PER_CPU(struct task_struct *, pmu_owner);
21888 +void pfm_cpu_disable(void);
21892 + * max vector argument elements for local storage (no kmalloc/kfree)
21893 + * The PFM_ARCH_PM*_ARG should be defined in perfmon_kern.h.
21894 + * If not, default (conservative) values are used
21896 +#ifndef PFM_ARCH_PMC_STK_ARG
21897 +#define PFM_ARCH_PMC_STK_ARG 1
21900 +#ifndef PFM_ARCH_PMD_STK_ARG
21901 +#define PFM_ARCH_PMD_STK_ARG 1
21904 +#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG
21905 +#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG
21907 +#else /* !CONFIG_PERFMON */
21911 + * perfmon hooks are nops when CONFIG_PERFMON is undefined
21913 +static inline void pfm_cpu_disable(void)
21916 +static inline void pfm_exit_thread(void)
21919 +static inline void pfm_handle_work(struct pt_regs *regs)
21922 +static inline void pfm_copy_thread(struct task_struct *t)
21925 +static inline void pfm_ctxsw_in(struct task_struct *p, struct task_struct *n)
21928 +static inline void pfm_ctxsw_out(struct task_struct *p, struct task_struct *n)
21931 +static inline void pfm_session_allcpus_release(void)
21934 +static inline int pfm_session_allcpus_acquire(void)
21939 +static inline void pfm_init_percpu(void)
21942 +#endif /* CONFIG_PERFMON */
21944 +#endif /* __LINUX_PERFMON_KERN_H__ */
21945 diff --git a/include/linux/perfmon_pmu.h b/include/linux/perfmon_pmu.h
21946 new file mode 100644
21947 index 0000000..3f5f9e8
21949 +++ b/include/linux/perfmon_pmu.h
21952 + * Copyright (c) 2006 Hewlett-Packard Development Company, L.P.
21953 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21955 + * Interface for PMU description modules
21957 + * This program is free software; you can redistribute it and/or
21958 + * modify it under the terms of version 2 of the GNU General Public
21959 + * License as published by the Free Software Foundation.
21961 + * This program is distributed in the hope that it will be useful,
21962 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21963 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21964 + * General Public License for more details.
21966 + * You should have received a copy of the GNU General Public License
21967 + * along with this program; if not, write to the Free Software
21968 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21971 +#ifndef __PERFMON_PMU_H__
21972 +#define __PERFMON_PMU_H__ 1
21975 + * generic information about a PMC or PMD register
21977 + * Dependency bitmasks:
21978 + * They are used to allow lazy save/restore in the context switch
21979 + * code. To avoid picking up stale configuration from a previous
21980 + * thread. Usng the bitmask, the generic read/write routines can
21981 + * ensure that all registers needed to support the measurement are
21982 + * restored properly on context switch in.
21984 +struct pfm_regmap_desc {
21985 + u16 type; /* role of the register */
21986 + u16 reserved1; /* for future use */
21987 + u32 reserved2; /* for future use */
21988 + u64 dfl_val; /* power-on default value (quiescent) */
21989 + u64 rsvd_msk; /* reserved bits: 1 means reserved */
21990 + u64 no_emul64_msk; /* bits to clear for PFM_REGFL_NO_EMUL64 */
21991 + unsigned long hw_addr; /* HW register address or index */
21992 + struct kobject kobj; /* for internal use only */
21993 + char *desc; /* HW register description string */
21994 + u64 dep_pmcs[PFM_PMC_BV];/* depending PMC registers */
21996 +#define to_reg(n) container_of(n, struct pfm_regmap_desc, kobj)
21999 + * pfm_reg_desc helper macros
22001 +#define PMC_D(t, d, v, r, n, h) \
22006 + .no_emul64_msk = n, \
22010 +#define PMD_D(t, d, h) \
22014 + .no_emul64_msk = 0, \
22018 +#define PMD_DR(t, d, h, r) \
22022 + .no_emul64_msk = 0, \
22027 + { .type = PFM_REG_NA }
22029 +#define PMD_DP(t, d, h, p) \
22033 + .no_emul64_msk = 0, \
22034 + .dep_pmcs[0] = p, \
22039 + * type of a PMU register (16-bit bitmask) for use with pfm_reg_desc.type
22041 +#define PFM_REG_NA 0x00 /* not avail. (not impl.,no access) must be 0 */
22042 +#define PFM_REG_I 0x01 /* PMC/PMD: implemented */
22043 +#define PFM_REG_WC 0x02 /* PMC: has write_checker */
22044 +#define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */
22045 +#define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */
22046 +#define PFM_REG_V 0x10 /* PMD: virtual reg */
22047 +#define PFM_REG_INTR 0x20 /* PMD: register can generate interrupt */
22048 +#define PFM_REG_SYS 0x40 /* PMC/PMD: register is for system-wide only */
22049 +#define PFM_REG_THR 0x80 /* PMC/PMD: register is for per-thread only */
22050 +#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */
22053 + * define some shortcuts for common types
22055 +#define PFM_REG_W (PFM_REG_WC|PFM_REG_I)
22056 +#define PFM_REG_W64 (PFM_REG_WC|PFM_REG_NO64|PFM_REG_I)
22057 +#define PFM_REG_C (PFM_REG_C64|PFM_REG_INTR|PFM_REG_I)
22058 +#define PFM_REG_I64 (PFM_REG_NO64|PFM_REG_I)
22059 +#define PFM_REG_IRO (PFM_REG_I|PFM_REG_RO)
22061 +typedef int (*pfm_pmc_check_t)(struct pfm_context *ctx,
22062 + struct pfm_event_set *set,
22063 + struct pfarg_pmc *req);
22065 +typedef int (*pfm_pmd_check_t)(struct pfm_context *ctx,
22066 + struct pfm_event_set *set,
22067 + struct pfarg_pmd *req);
22070 +typedef u64 (*pfm_sread_t)(struct pfm_context *ctx, unsigned int cnum);
22071 +typedef void (*pfm_swrite_t)(struct pfm_context *ctx, unsigned int cnum, u64 val);
22074 + * structure used by pmu description modules
22076 + * probe_pmu() routine return value:
22077 + * - 1 means recognized PMU
22078 + * - 0 means not recognized PMU
22080 +struct pfm_pmu_config {
22081 + char *pmu_name; /* PMU family name */
22082 + char *version; /* config module version */
22084 + int counter_width; /* width of hardware counter */
22086 + struct pfm_regmap_desc *pmc_desc; /* PMC register descriptions */
22087 + struct pfm_regmap_desc *pmd_desc; /* PMD register descriptions */
22089 + pfm_pmc_check_t pmc_write_check;/* write checker (optional) */
22090 + pfm_pmd_check_t pmd_write_check;/* write checker (optional) */
22091 + pfm_pmd_check_t pmd_read_check; /* read checker (optional) */
22093 + pfm_sread_t pmd_sread; /* virtual pmd read */
22094 + pfm_swrite_t pmd_swrite; /* virtual pmd write */
22096 + int (*probe_pmu)(void);/* probe PMU routine */
22098 + u16 num_pmc_entries;/* #entries in pmc_desc */
22099 + u16 num_pmd_entries;/* #entries in pmd_desc */
22101 + void *pmu_info; /* model-specific infos */
22102 + u32 flags; /* set of flags */
22104 + struct module *owner; /* pointer to module struct */
22107 + * fields computed internally, do not set in module
22109 + struct pfm_regdesc regs_all; /* regs available to all */
22110 + struct pfm_regdesc regs_thr; /* regs avail per-thread */
22111 + struct pfm_regdesc regs_sys; /* regs avail system-wide */
22113 + u64 ovfl_mask; /* overflow mask */
22116 +static inline void *pfm_pmu_info(void)
22118 + return pfm_pmu_conf->pmu_info;
22122 + * pfm_pmu_config flags
22124 +#define PFM_PMUFL_IS_BUILTIN 0x1 /* pmu config is compiled in */
22127 + * we need to know whether the PMU description is builtin or compiled
22131 +#define PFM_PMU_BUILTIN_FLAG 0 /* not built as a module */
22133 +#define PFM_PMU_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */
22136 +int pfm_pmu_register(struct pfm_pmu_config *cfg);
22137 +void pfm_pmu_unregister(struct pfm_pmu_config *cfg);
22139 +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu);
22140 +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
22142 +#endif /* __PERFMON_PMU_H__ */
22143 diff --git a/include/linux/sched.h b/include/linux/sched.h
22144 index 3d9120c..8fb3b55 100644
22145 --- a/include/linux/sched.h
22146 +++ b/include/linux/sched.h
22147 @@ -96,6 +96,7 @@ struct exec_domain;
22148 struct futex_pi_state;
22149 struct robust_list_head;
22151 +struct pfm_context;
22154 * List of flags we want to share for kernel threads,
22155 @@ -1301,6 +1302,9 @@ struct task_struct {
22156 int latency_record_count;
22157 struct latency_record latency_record[LT_SAVECOUNT];
22159 +#ifdef CONFIG_PERFMON
22160 + struct pfm_context *pfm_context;
22165 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
22166 index d6ff145..e308523 100644
22167 --- a/include/linux/syscalls.h
22168 +++ b/include/linux/syscalls.h
22169 @@ -29,6 +29,13 @@ struct msqid_ds;
22170 struct new_utsname;
22172 struct __old_kernel_stat;
22176 +struct pfarg_start;
22177 +struct pfarg_load;
22178 +struct pfarg_setinfo;
22179 +struct pfarg_setdesc;
22183 @@ -625,4 +632,27 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
22185 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
22187 +asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq,
22188 + void __user *uarg, size_t smpl_size);
22189 +asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq,
22191 +asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq,
22193 +asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq,
22195 +asmlinkage long sys_pfm_restart(int fd);
22196 +asmlinkage long sys_pfm_stop(int fd);
22197 +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq);
22198 +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq);
22199 +asmlinkage long sys_pfm_unload_context(int fd);
22200 +asmlinkage long sys_pfm_delete_evtsets(int fd,
22201 + struct pfarg_setinfo __user *ureq,
22203 +asmlinkage long sys_pfm_create_evtsets(int fd,
22204 + struct pfarg_setdesc __user *ureq,
22206 +asmlinkage long sys_pfm_getinfo_evtsets(int fd,
22207 + struct pfarg_setinfo __user *ureq,
22211 diff --git a/kernel/sched.c b/kernel/sched.c
22212 index ad1962d..1bc8fcf 100644
22213 --- a/kernel/sched.c
22214 +++ b/kernel/sched.c
22216 #include <linux/debugfs.h>
22217 #include <linux/ctype.h>
22218 #include <linux/ftrace.h>
22219 +#include <linux/perfmon_kern.h>
22221 #include <asm/tlb.h>
22222 #include <asm/irq_regs.h>
22223 diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
22224 index 08d6e1b..61f4155 100644
22225 --- a/kernel/sys_ni.c
22226 +++ b/kernel/sys_ni.c
22227 @@ -126,6 +126,19 @@ cond_syscall(sys_vm86);
22228 cond_syscall(compat_sys_ipc);
22229 cond_syscall(compat_sys_sysctl);
22231 +cond_syscall(sys_pfm_create_context);
22232 +cond_syscall(sys_pfm_write_pmcs);
22233 +cond_syscall(sys_pfm_write_pmds);
22234 +cond_syscall(sys_pfm_read_pmds);
22235 +cond_syscall(sys_pfm_restart);
22236 +cond_syscall(sys_pfm_start);
22237 +cond_syscall(sys_pfm_stop);
22238 +cond_syscall(sys_pfm_load_context);
22239 +cond_syscall(sys_pfm_unload_context);
22240 +cond_syscall(sys_pfm_create_evtsets);
22241 +cond_syscall(sys_pfm_delete_evtsets);
22242 +cond_syscall(sys_pfm_getinfo_evtsets);
22244 /* arch-specific weak syscall entries */
22245 cond_syscall(sys_pciconfig_read);
22246 cond_syscall(sys_pciconfig_write);
22247 diff --git a/perfmon/Makefile b/perfmon/Makefile
22248 new file mode 100644
22249 index 0000000..32ff037
22251 +++ b/perfmon/Makefile
22254 +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
22255 +# Contributed by Stephane Eranian <eranian@hpl.hp.com>
22257 +obj-y = perfmon_init.o perfmon_rw.o perfmon_res.o \
22258 + perfmon_pmu.o perfmon_sysfs.o perfmon_syscalls.o \
22259 + perfmon_file.o perfmon_ctxsw.o perfmon_intr.o \
22260 + perfmon_dfl_smpl.o perfmon_sets.o perfmon_hotplug.o \
22261 + perfmon_msg.o perfmon_smpl.o perfmon_attach.o \
22262 + perfmon_activate.o perfmon_ctx.o perfmon_fmt.o
22264 +obj-$(CONFIG_PERFMON_DEBUG_FS) += perfmon_debugfs.o
22265 diff --git a/perfmon/perfmon_activate.c b/perfmon/perfmon_activate.c
22266 new file mode 100644
22267 index 0000000..d9f501d
22269 +++ b/perfmon/perfmon_activate.c
22272 + * perfmon_activate.c: perfmon2 start/stop functions
22274 + * This file implements the perfmon2 interface which
22275 + * provides access to the hardware performance counters
22276 + * of the host processor.
22279 + * The initial version of perfmon.c was written by
22280 + * Ganesh Venkitachalam, IBM Corp.
22282 + * Then it was modified for perfmon-1.x by Stephane Eranian and
22283 + * David Mosberger, Hewlett Packard Co.
22285 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
22286 + * by Stephane Eranian, Hewlett Packard Co.
22288 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
22289 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
22290 + * David Mosberger-Tang <davidm@hpl.hp.com>
22292 + * More information about perfmon available at:
22293 + * http://perfmon2.sf.net
22295 + * This program is free software; you can redistribute it and/or
22296 + * modify it under the terms of version 2 of the GNU General Public
22297 + * License as published by the Free Software Foundation.
22299 + * This program is distributed in the hope that it will be useful,
22300 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22301 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22302 + * General Public License for more details.
22304 + * You should have received a copy of the GNU General Public License
22305 + * along with this program; if not, write to the Free Software
22306 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22309 +#include <linux/kernel.h>
22310 +#include <linux/perfmon_kern.h>
22311 +#include "perfmon_priv.h"
22314 + * __pfm_start - activate monitoring
22315 + * @ctx: context to operate on
22316 + * @start: pfarg_start as passed by user
22318 + * When operating in per-thread mode and not self-monitoring, the monitored
22319 + * thread must be stopped. Activation will be effective next time the thread
22320 + * is context switched in.
22322 + * The pfarg_start argument is optional and may be used to designate
22323 + * the initial event set to activate. When not provided, the last active
22324 + * set is used. For the first activation, set0 is used when start is NULL.
22326 + * On some architectures, e.g., IA-64, it may be possible to start monitoring
22327 + * without calling this function under certain conditions (per-thread and self
22328 + * monitoring). In this case, either set0 or the last active set is used.
22330 + * the context is locked and interrupts are disabled.
22332 +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start)
22334 + struct task_struct *task, *owner_task;
22335 + struct pfm_event_set *new_set, *old_set;
22338 + task = ctx->task;
22341 + * UNLOADED: error
22342 + * LOADED : normal start, nop if started unless set is different
22343 + * MASKED : nop or change set when unmasking
22344 + * ZOMBIE : cannot happen
22346 + if (ctx->state == PFM_CTX_UNLOADED)
22349 + old_set = new_set = ctx->active_set;
22352 + * always the case for system-wide
22354 + if (task == NULL)
22357 + is_self = task == current;
22360 + * argument is provided?
22364 + * find the set to load first
22366 + new_set = pfm_find_set(ctx, start->start_set, 0);
22367 + if (new_set == NULL) {
22368 + PFM_DBG("event set%u does not exist",
22369 + start->start_set);
22374 + PFM_DBG("cur_set=%u req_set=%u", old_set->id, new_set->id);
22377 + * if we need to change the active set we need
22378 + * to check if we can access the PMU
22380 + if (new_set != old_set) {
22382 + owner_task = __get_cpu_var(pmu_owner);
22384 + * system-wide: must run on the right CPU
22385 + * per-thread : must be the owner of the PMU context
22387 + * pfm_switch_sets() returns with monitoring stopped
22390 + pfm_switch_sets(ctx, new_set, PFM_PMD_RESET_LONG, 1);
22393 + * In a UP kernel, the PMU may contain the state
22394 + * of the task we want to operate on, yet the task
22395 + * may be switched out (lazy save). We need to save
22396 + * current state (old_set), switch active_set and
22397 + * mark it for reload.
22399 + if (owner_task == task)
22400 + pfm_save_pmds(ctx, old_set);
22401 + ctx->active_set = new_set;
22402 + new_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
22407 + * mark as started
22408 + * must be done before calling pfm_arch_start()
22410 + ctx->flags.started = 1;
22412 + pfm_arch_start(task, ctx);
22415 + * we check whether we had a pending ovfl before restarting.
22416 + * If so we need to regenerate the interrupt to make sure we
22417 + * keep recorded samples. For non-self monitoring this check
22418 + * is done in the pfm_ctxswin_thread() routine.
22420 + * we check new_set/old_set because pfm_switch_sets() already
22421 + * takes care of replaying the pending interrupts
22423 + if (is_self && new_set != old_set && new_set->npend_ovfls) {
22424 + pfm_arch_resend_irq(ctx);
22425 + pfm_stats_inc(ovfl_intr_replay_count);
22429 + * always start with full timeout
22431 + new_set->hrtimer_rem = new_set->hrtimer_exp;
22434 + * activate timeout for system-wide, self-montoring
22435 + * Always start with full timeout
22436 + * Timeout is at least one tick away, so no risk of
22437 + * having hrtimer_start() trying to wakeup softirqd
22438 + * and thus causing troubles. This cannot happen anmyway
22439 + * because cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
22441 + if (is_self && new_set->flags & PFM_SETFL_TIME_SWITCH) {
22442 + hrtimer_start(&__get_cpu_var(pfm_hrtimer),
22443 + new_set->hrtimer_rem,
22444 + HRTIMER_MODE_REL);
22446 + PFM_DBG("set%u started timeout=%lld",
22448 + (unsigned long long)new_set->hrtimer_rem.tv64);
22452 + * we restart total duration even if context was
22453 + * already started. In that case, counts are simply
22456 + * For per-thread, if not self-monitoring, the statement
22457 + * below will have no effect because thread is stopped.
22458 + * The field is reset of ctxsw in.
22460 + new_set->duration_start = sched_clock();
22466 + * __pfm_stop - stop monitoring
22467 + * @ctx: context to operate on
22468 + * @release_info: infos for caller (see below)
22470 + * When operating in per-thread* mode and when not self-monitoring,
22471 + * the monitored thread must be stopped.
22473 + * the context is locked and interrupts are disabled.
22475 + * release_info value upon return:
22476 + * - bit 0 : unused
22477 + * - bit 1 : when set, must cancel hrtimer
22479 +int __pfm_stop(struct pfm_context *ctx, int *release_info)
22481 + struct pfm_event_set *set;
22482 + struct task_struct *task;
22486 + *release_info = 0;
22488 + now = sched_clock();
22489 + state = ctx->state;
22490 + set = ctx->active_set;
22493 + * context must be attached (zombie cannot happen)
22495 + if (state == PFM_CTX_UNLOADED)
22498 + task = ctx->task;
22500 + PFM_DBG("ctx_task=[%d] ctx_state=%d is_system=%d",
22501 + task ? task->pid : -1,
22506 + * this happens for system-wide context
22508 + if (task == NULL)
22512 + * compute elapsed time
22514 + * unless masked, compute elapsed duration, stop timeout
22516 + if (task == current && state == PFM_CTX_LOADED) {
22518 + * timeout cancel must be deferred until context is
22519 + * unlocked to avoid race with pfm_handle_switch_timeout()
22521 + if (set->flags & PFM_SETFL_TIME_SWITCH)
22522 + *release_info |= 0x2;
22524 + set->duration += now - set->duration_start;
22527 + pfm_arch_stop(task, ctx);
22529 + ctx->flags.started = 0;
22531 + * starting now, in-flight PMU interrupt for this context
22532 + * are treated as spurious
22536 diff --git a/perfmon/perfmon_attach.c b/perfmon/perfmon_attach.c
22537 new file mode 100644
22538 index 0000000..bbd1d1e
22540 +++ b/perfmon/perfmon_attach.c
22543 + * perfmon_attach.c: perfmon2 load/unload functions
22545 + * This file implements the perfmon2 interface which
22546 + * provides access to the hardware performance counters
22547 + * of the host processor.
22550 + * The initial version of perfmon.c was written by
22551 + * Ganesh Venkitachalam, IBM Corp.
22553 + * Then it was modified for perfmon-1.x by Stephane Eranian and
22554 + * David Mosberger, Hewlett Packard Co.
22556 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
22557 + * by Stephane Eranian, Hewlett Packard Co.
22559 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
22560 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
22561 + * David Mosberger-Tang <davidm@hpl.hp.com>
22563 + * More information about perfmon available at:
22564 + * http://perfmon2.sf.net
22566 + * This program is free software; you can redistribute it and/or
22567 + * modify it under the terms of version 2 of the GNU General Public
22568 + * License as published by the Free Software Foundation.
22570 + * This program is distributed in the hope that it will be useful,
22571 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22572 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22573 + * General Public License for more details.
22575 + * You should have received a copy of the GNU General Public License
22576 + * along with this program; if not, write to the Free Software
22577 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22580 +#include <linux/kernel.h>
22581 +#include <linux/fs.h>
22582 +#include <linux/perfmon_kern.h>
22583 +#include "perfmon_priv.h"
22586 + * __pfm_load_context_sys - attach context to a CPU in system-wide mode
22587 + * @ctx: context to operate on
22588 + * @set_id: set to activate first
22589 + * @cpu: CPU to monitor
22591 + * The cpu specified in the pfarg_load.load_pid argument must be the current
22594 + * The function must be called with the context locked and interrupts disabled.
22596 +static int pfm_load_ctx_sys(struct pfm_context *ctx, u16 set_id, u32 cpu)
22598 + struct pfm_event_set *set;
22602 + mycpu = smp_processor_id();
22605 + * system-wide: check we are running on the desired CPU
22607 + if (cpu != mycpu) {
22608 + PFM_DBG("wrong CPU: asking %u but on %u", cpu, mycpu);
22613 + * initialize sets
22615 + set = pfm_prepare_sets(ctx, set_id);
22617 + PFM_DBG("event set%u does not exist", set_id);
22621 + PFM_DBG("set=%u set_flags=0x%x", set->id, set->flags);
22623 + ctx->cpu = mycpu;
22624 + ctx->task = NULL;
22625 + ctx->active_set = set;
22628 + * perform any architecture specific actions
22630 + ret = pfm_arch_load_context(ctx);
22632 + goto error_noload;
22635 + * now reserve the session, before we can proceed with
22636 + * actually accessing the PMU hardware
22638 + ret = pfm_session_acquire(1, mycpu);
22644 + * caller must be on monitored CPU to access PMU, thus this is
22645 + * a form of self-monitoring
22647 + ctx->flags.is_self = 1;
22652 + * load PMD from set
22653 + * load PMC from set
22655 + pfm_arch_restore_pmds(ctx, set);
22656 + pfm_arch_restore_pmcs(ctx, set);
22659 + * set new ownership
22661 + pfm_set_pmu_owner(NULL, ctx);
22664 + * reset pending work
22666 + ctx->flags.work_type = PFM_WORK_NONE;
22667 + ctx->flags.reset_count = 0;
22670 + * reset message queue
22672 + ctx->msgq_head = ctx->msgq_tail = 0;
22674 + ctx->state = PFM_CTX_LOADED;
22678 + pfm_arch_unload_context(ctx);
22684 + * __pfm_load_context_thread - attach context to a thread
22685 + * @ctx: context to operate on
22686 + * @set_id: first set
22687 + * @task: threadf to attach to
22689 + * The function must be called with the context locked and interrupts disabled.
22691 +static int pfm_load_ctx_thread(struct pfm_context *ctx, u16 set_id,
22692 + struct task_struct *task)
22694 + struct pfm_event_set *set;
22695 + struct pfm_context *old;
22698 + PFM_DBG("load_pid=%d set=%u", task->pid, set_id);
22701 + * - task to attach to is checked in sys_pfm_load_context() to avoid
22702 + * locking issues. if found, and not self, task refcount was
22705 + old = cmpxchg(&task->pfm_context, NULL, ctx);
22707 + PFM_DBG("load_pid=%d has a context "
22708 + "old=%p new=%p cur=%p",
22712 + task->pfm_context);
22717 + * initialize sets
22719 + set = pfm_prepare_sets(ctx, set_id);
22721 + PFM_DBG("event set%u does not exist", set_id);
22726 + ctx->task = task;
22728 + ctx->active_set = set;
22731 + * perform any architecture specific actions
22733 + ret = pfm_arch_load_context(ctx);
22735 + goto error_noload;
22738 + * now reserve the session, before we can proceed with
22739 + * actually accessing the PMU hardware
22741 + ret = pfm_session_acquire(0, -1);
22747 + if (ctx->task != current) {
22749 + ctx->flags.is_self = 0;
22751 + /* force a full reload */
22752 + ctx->last_act = PFM_INVALID_ACTIVATION;
22753 + ctx->last_cpu = -1;
22754 + set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
22757 + pfm_check_save_prev_ctx();
22759 + ctx->last_cpu = smp_processor_id();
22760 + __get_cpu_var(pmu_activation_number)++;
22761 + ctx->last_act = __get_cpu_var(pmu_activation_number);
22763 + ctx->flags.is_self = 1;
22766 + * load PMD from set
22767 + * load PMC from set
22769 + pfm_arch_restore_pmds(ctx, set);
22770 + pfm_arch_restore_pmcs(ctx, set);
22773 + * set new ownership
22775 + pfm_set_pmu_owner(ctx->task, ctx);
22777 + set_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
22780 + * reset pending work
22782 + ctx->flags.work_type = PFM_WORK_NONE;
22783 + ctx->flags.reset_count = 0;
22786 + * reset message queue
22788 + ctx->msgq_head = ctx->msgq_tail = 0;
22790 + ctx->state = PFM_CTX_LOADED;
22795 + pfm_arch_unload_context(ctx);
22796 + ctx->task = NULL;
22801 + task->pfm_context = NULL;
22806 + * __pfm_load_context - attach context to a CPU or thread
22807 + * @ctx: context to operate on
22808 + * @load: pfarg_load as passed by user
22809 + * @task: thread to attach to, NULL for system-wide
22811 +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *load,
22812 + struct task_struct *task)
22814 + if (ctx->flags.system)
22815 + return pfm_load_ctx_sys(ctx, load->load_set, load->load_pid);
22816 + return pfm_load_ctx_thread(ctx, load->load_set, task);
22820 + * pfm_update_ovfl_pmds - account for pending ovfls on PMDs
22821 + * @ctx: context to operate on
22823 + * This function is always called after pfm_stop has been issued
22825 +static void pfm_update_ovfl_pmds(struct pfm_context *ctx)
22827 + struct pfm_event_set *set;
22830 + u16 num_ovfls, i, first;
22832 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
22833 + first = ctx->regs.first_intr_pmd;
22834 + cnt_pmds = ctx->regs.cnt_pmds;
22837 + * look for pending interrupts and adjust PMD values accordingly
22839 + list_for_each_entry(set, &ctx->set_list, list) {
22841 + if (!set->npend_ovfls)
22844 + num_ovfls = set->npend_ovfls;
22845 + PFM_DBG("set%u nintrs=%u", set->id, num_ovfls);
22847 + for (i = first; num_ovfls; i++) {
22848 + if (test_bit(i, cast_ulp(set->povfl_pmds))) {
22849 + /* only correct value for counters */
22850 + if (test_bit(i, cast_ulp(cnt_pmds)))
22851 + set->pmds[i].value += 1 + ovfl_mask;
22854 + PFM_DBG("pmd%u set=%u val=0x%llx",
22857 + (unsigned long long)set->pmds[i].value);
22860 + * we need to clear to prevent a pfm_getinfo_evtsets() from
22861 + * returning stale data even after the context is unloaded
22863 + set->npend_ovfls = 0;
22864 + bitmap_zero(cast_ulp(set->povfl_pmds), ctx->regs.max_intr_pmd);
22870 + * __pfm_unload_context - detach context from CPU or thread
22871 + * @ctx: context to operate on
22872 + * @release_info: pointer to return info (see below)
22874 + * The function must be called with the context locked and interrupts disabled.
22876 + * release_info value upon return:
22877 + * - bit 0: when set, must free context
22878 + * - bit 1: when set, must cancel hrtimer
22880 +int __pfm_unload_context(struct pfm_context *ctx, int *release_info)
22882 + struct task_struct *task;
22885 + PFM_DBG("ctx_state=%d task [%d]",
22887 + ctx->task ? ctx->task->pid : -1);
22889 + *release_info = 0;
22892 + * unload only when necessary
22894 + if (ctx->state == PFM_CTX_UNLOADED)
22897 + task = ctx->task;
22900 + * stop monitoring
22902 + ret = __pfm_stop(ctx, release_info);
22906 + ctx->state = PFM_CTX_UNLOADED;
22907 + ctx->flags.can_restart = 0;
22910 + * save active set
22912 + * if not current task and due to lazy, state may
22914 + * for system-wide, guaranteed to run on correct CPU
22916 + if (__get_cpu_var(pmu_ctx) == ctx) {
22918 + * pending overflows have been saved by pfm_stop()
22920 + pfm_save_pmds(ctx, ctx->active_set);
22921 + pfm_set_pmu_owner(NULL, NULL);
22922 + PFM_DBG("released ownership");
22926 + * account for pending overflows
22928 + pfm_update_ovfl_pmds(ctx);
22931 + * arch-specific unload operations
22933 + pfm_arch_unload_context(ctx);
22936 + * per-thread: disconnect from monitored task
22939 + task->pfm_context = NULL;
22940 + ctx->task = NULL;
22941 + clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
22942 + clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
22943 + pfm_arch_disarm_handle_work(task);
22946 + * session can be freed, must have interrupts enabled
22947 + * thus we release in the caller. Bit 0 signals to the
22948 + * caller that the session can be released.
22950 + *release_info |= 0x1;
22956 + * __pfm_exit_thread - detach and free context on thread exit
22958 +void __pfm_exit_thread(void)
22960 + struct pfm_context *ctx;
22961 + unsigned long flags;
22962 + int free_ok = 0, release_info = 0;
22965 + ctx = current->pfm_context;
22967 + BUG_ON(ctx->flags.system);
22969 + spin_lock_irqsave(&ctx->lock, flags);
22971 + PFM_DBG("state=%d is_self=%d", ctx->state, ctx->flags.is_self);
22974 + * __pfm_unload_context() cannot fail
22975 + * in the context states we are interested in
22977 + switch (ctx->state) {
22978 + case PFM_CTX_LOADED:
22979 + case PFM_CTX_MASKED:
22980 + __pfm_unload_context(ctx, &release_info);
22982 + * end notification only sent for non
22983 + * self-monitoring context
22985 + if (!ctx->flags.is_self)
22986 + pfm_end_notify(ctx);
22988 + case PFM_CTX_ZOMBIE:
22989 + __pfm_unload_context(ctx, &release_info);
22993 + BUG_ON(ctx->state != PFM_CTX_LOADED);
22996 + spin_unlock_irqrestore(&ctx->lock, flags);
22999 + * cancel timer now that context is unlocked
23001 + if (release_info & 0x2) {
23002 + ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
23003 + PFM_DBG("timeout cancel=%d", ret);
23006 + if (release_info & 0x1)
23007 + pfm_session_release(0, 0);
23010 + * All memory free operations (especially for vmalloc'ed memory)
23011 + * MUST be done with interrupts ENABLED.
23014 + pfm_free_context(ctx);
23016 diff --git a/perfmon/perfmon_ctx.c b/perfmon/perfmon_ctx.c
23017 new file mode 100644
23018 index 0000000..afe6078
23020 +++ b/perfmon/perfmon_ctx.c
23023 + * perfmon_ctx.c: perfmon2 context functions
23025 + * This file implements the perfmon2 interface which
23026 + * provides access to the hardware performance counters
23027 + * of the host processor.
23030 + * The initial version of perfmon.c was written by
23031 + * Ganesh Venkitachalam, IBM Corp.
23033 + * Then it was modified for perfmon-1.x by Stephane Eranian and
23034 + * David Mosberger, Hewlett Packard Co.
23036 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
23037 + * by Stephane Eranian, Hewlett Packard Co.
23039 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
23040 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23041 + * David Mosberger-Tang <davidm@hpl.hp.com>
23043 + * More information about perfmon available at:
23044 + * http://perfmon2.sf.net
23046 + * This program is free software; you can redistribute it and/or
23047 + * modify it under the terms of version 2 of the GNU General Public
23048 + * License as published by the Free Software Foundation.
23050 + * This program is distributed in the hope that it will be useful,
23051 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23052 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23053 + * General Public License for more details.
23055 + * You should have received a copy of the GNU General Public License
23056 + * along with this program; if not, write to the Free Software
23057 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23060 +#include <linux/kernel.h>
23061 +#include <linux/fs.h>
23062 +#include <linux/perfmon_kern.h>
23063 +#include "perfmon_priv.h"
23066 + * context memory pool pointer
23068 +static struct kmem_cache *pfm_ctx_cachep;
23071 + * pfm_free_context - de-allocate context and associated resources
23072 + * @ctx: context to free
23074 +void pfm_free_context(struct pfm_context *ctx)
23076 + pfm_arch_context_free(ctx);
23078 + pfm_free_sets(ctx);
23080 + pfm_smpl_buf_free(ctx);
23082 + PFM_DBG("free ctx @0x%p", ctx);
23083 + kmem_cache_free(pfm_ctx_cachep, ctx);
23085 + * decrease refcount on:
23086 + * - PMU description table
23087 + * - sampling format
23089 + pfm_pmu_conf_put();
23090 + pfm_pmu_release();
23094 + * pfm_ctx_flags_sane - check if context flags passed by user are okay
23095 + * @ctx_flags: flags passed user on pfm_create_context
23098 + * 0 if successful
23099 + * <0 and error code otherwise
23101 +static inline int pfm_ctx_flags_sane(u32 ctx_flags)
23103 + if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
23104 + if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
23105 + PFM_DBG("cannot use blocking mode in syswide mode");
23113 + * pfm_ctx_permissions - check authorization to create new context
23114 + * @ctx_flags: context flags passed by user
23116 + * check for permissions to create a context.
23118 + * A sysadmin may decide to restrict creation of per-thread
23119 + * and/or system-wide context to a group of users using the
23120 + * group id via /sys/kernel/perfmon/task_group and
23121 + * /sys/kernel/perfmon/sys_group.
23123 + * Once we identify a user level package which can be used
23124 + * to grant/revoke Linux capabilites at login via PAM, we will
23125 + * be able to use capabilities. We would also need to increase
23126 + * the size of cap_t to support more than 32 capabilities (it
23127 + * is currently defined as u32 and 32 capabilities are alrady
23130 +static inline int pfm_ctx_permissions(u32 ctx_flags)
23132 + if ((ctx_flags & PFM_FL_SYSTEM_WIDE)
23133 + && pfm_controls.sys_group != PFM_GROUP_PERM_ANY
23134 + && !in_group_p(pfm_controls.sys_group)) {
23135 + PFM_DBG("user group not allowed to create a syswide ctx");
23137 + } else if (pfm_controls.task_group != PFM_GROUP_PERM_ANY
23138 + && !in_group_p(pfm_controls.task_group)) {
23139 + PFM_DBG("user group not allowed to create a task context");
23146 + * __pfm_create_context - allocate and initialize a perfmon context
23147 + * @req : pfarg_ctx from user
23148 + * @fmt : pointer sampling format, NULL if not used
23149 + * @fmt_arg: pointer to argument to sampling format, NULL if not used
23150 + * @mode: PFM_NORMAL or PFM_COMPAT(IA-64 v2.0 compatibility)
23151 + * @ctx : address of new context upon succesful return, undefined otherwise
23153 + * function used to allocate a new context. A context is allocated along
23154 + * with the default event set. If a sampling format is used, the buffer
23155 + * may be allocated and initialized.
23157 + * The file descriptor identifying the context is allocated and returned
23160 + * This function operates with no locks and interrupts are enabled.
23162 + * >=0: the file descriptor to identify the context
23163 + * <0 : the error code
23165 +int __pfm_create_context(struct pfarg_ctx *req,
23166 + struct pfm_smpl_fmt *fmt,
23169 + struct pfm_context **new_ctx)
23171 + struct pfm_context *ctx;
23172 + struct file *filp = NULL;
23176 + ctx_flags = req->ctx_flags;
23178 + /* Increase refcount on PMU description */
23179 + ret = pfm_pmu_conf_get(1);
23183 + ret = pfm_ctx_flags_sane(ctx_flags);
23185 + goto error_alloc;
23187 + ret = pfm_ctx_permissions(ctx_flags);
23189 + goto error_alloc;
23192 + * we can use GFP_KERNEL and potentially sleep because we do
23193 + * not hold any lock at this point.
23197 + ctx = kmem_cache_zalloc(pfm_ctx_cachep, GFP_KERNEL);
23199 + goto error_alloc;
23201 + PFM_DBG("alloc ctx @0x%p", ctx);
23203 + INIT_LIST_HEAD(&ctx->set_list);
23204 + spin_lock_init(&ctx->lock);
23205 + init_completion(&ctx->restart_complete);
23206 + init_waitqueue_head(&ctx->msgq_wait);
23209 + * context is unloaded
23211 + ctx->state = PFM_CTX_UNLOADED;
23214 + * initialization of context's flags
23215 + * must be done before pfm_find_set()
23217 + ctx->flags.block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
23218 + ctx->flags.system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
23219 + ctx->flags.no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
23220 + ctx->flags.ia64_v20_compat = mode == PFM_COMPAT ? 1 : 0;
23222 + ret = pfm_pmu_acquire(ctx);
23226 + * check if PMU is usable
23228 + if (!(ctx->regs.num_pmcs && ctx->regs.num_pmcs)) {
23229 + PFM_DBG("no usable PMU registers");
23235 + * link to format, must be done first for correct
23236 + * error handling in pfm_context_free()
23238 + ctx->smpl_fmt = fmt;
23241 + fd = pfm_alloc_fd(&filp);
23246 + * initialize arch-specific section
23247 + * must be done before fmt_init()
23249 + ret = pfm_arch_context_create(ctx, ctx_flags);
23256 + * add initial set
23258 + if (pfm_create_initial_set(ctx))
23262 + * does the user want to sample?
23263 + * must be done after pfm_pmu_acquire() because
23264 + * needs ctx->regs
23267 + ret = pfm_setup_smpl_fmt(ctx, ctx_flags, fmt_arg, filp);
23272 + filp->private_data = ctx;
23274 + ctx->last_act = PFM_INVALID_ACTIVATION;
23275 + ctx->last_cpu = -1;
23278 + * initialize notification message queue
23280 + ctx->msgq_head = ctx->msgq_tail = 0;
23282 + PFM_DBG("flags=0x%x system=%d notify_block=%d no_msg=%d"
23283 + " use_fmt=%d ctx_fd=%d mode=%d",
23285 + ctx->flags.system,
23286 + ctx->flags.block,
23287 + ctx->flags.no_msg,
23295 + * we defer the fd_install until we are certain the call succeeded
23296 + * to ensure we do not have to undo its effect. Neither put_filp()
23297 + * nor put_unused_fd() undoes the effect of fd_install().
23299 + fd_install(fd, filp);
23305 + put_unused_fd(fd);
23308 + * calls the right *_put() functions
23309 + * calls pfm_release_pmu()
23311 + pfm_free_context(ctx);
23314 + pfm_pmu_conf_put();
23316 + pfm_smpl_fmt_put(fmt);
23321 + * pfm_init_ctx -- initialize context SLAB
23323 + * called from pfm_init
23325 +int __init pfm_init_ctx(void)
23327 + pfm_ctx_cachep = kmem_cache_create("pfm_context",
23328 + sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE,
23329 + SLAB_HWCACHE_ALIGN, 0, NULL);
23330 + if (!pfm_ctx_cachep) {
23331 + PFM_ERR("cannot initialize context slab");
23336 diff --git a/perfmon/perfmon_ctxsw.c b/perfmon/perfmon_ctxsw.c
23337 new file mode 100644
23338 index 0000000..9a28d13
23340 +++ b/perfmon/perfmon_ctxsw.c
23343 + * perfmon_cxtsw.c: perfmon2 context switch code
23345 + * This file implements the perfmon2 interface which
23346 + * provides access to the hardware performance counters
23347 + * of the host processor.
23349 + * The initial version of perfmon.c was written by
23350 + * Ganesh Venkitachalam, IBM Corp.
23352 + * Then it was modified for perfmon-1.x by Stephane Eranian and
23353 + * David Mosberger, Hewlett Packard Co.
23355 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
23356 + * by Stephane Eranian, Hewlett Packard Co.
23358 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
23359 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23360 + * David Mosberger-Tang <davidm@hpl.hp.com>
23362 + * More information about perfmon available at:
23363 + * http://perfmon2.sf.net
23365 + * This program is free software; you can redistribute it and/or
23366 + * modify it under the terms of version 2 of the GNU General Public
23367 + * License as published by the Free Software Foundation.
23369 + * This program is distributed in the hope that it will be useful,
23370 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23371 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23372 + * General Public License for more details.
23374 + * You should have received a copy of the GNU General Public License
23375 + * along with this program; if not, write to the Free Software
23376 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23379 +#include <linux/kernel.h>
23380 +#include <linux/perfmon_kern.h>
23381 +#include "perfmon_priv.h"
23383 +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
23385 + u64 val, ovfl_mask;
23386 + u64 *used_pmds, *cnt_pmds;
23389 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
23390 + num = set->nused_pmds;
23391 + cnt_pmds = ctx->regs.cnt_pmds;
23392 + used_pmds = set->used_pmds;
23395 + * save HW PMD, for counters, reconstruct 64-bit value
23397 + for (i = 0; num; i++) {
23398 + if (test_bit(i, cast_ulp(used_pmds))) {
23399 + val = pfm_read_pmd(ctx, i);
23400 + if (likely(test_bit(i, cast_ulp(cnt_pmds))))
23401 + val = (set->pmds[i].value & ~ovfl_mask) |
23402 + (val & ovfl_mask);
23403 + set->pmds[i].value = val;
23407 + pfm_arch_clear_pmd_ovfl_cond(ctx, set);
23411 + * interrupts are disabled (no preemption)
23413 +void __pfm_ctxswin_thread(struct task_struct *task,
23414 + struct pfm_context *ctx, u64 now)
23417 + struct pfm_event_set *set;
23418 + int reload_pmcs, reload_pmds;
23419 + int mycpu, is_active;
23421 + mycpu = smp_processor_id();
23423 + cur_act = __get_cpu_var(pmu_activation_number);
23425 + * we need to lock context because it could be accessed
23426 + * from another CPU. Normally the schedule() functions
23427 + * has masked interrupts which should be enough to
23428 + * protect against PMU interrupts.
23430 + spin_lock(&ctx->lock);
23432 + is_active = pfm_arch_is_active(ctx);
23434 + set = ctx->active_set;
23437 + * in case fo zombie, we do not complete ctswin of the
23438 + * PMU, and we force a call to pfm_handle_work() to finish
23439 + * cleanup, i.e., free context + smpl_buff. The reason for
23440 + * deferring to pfm_handle_work() is that it is not possible
23441 + * to vfree() with interrupts disabled.
23443 + if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) {
23444 + pfm_post_work(task, ctx, PFM_WORK_ZOMBIE);
23449 + * if we were the last user of the PMU on that CPU,
23450 + * then nothing to do except restore psr
23452 + if (ctx->last_cpu == mycpu && ctx->last_act == cur_act) {
23454 + * check for forced reload conditions
23456 + reload_pmcs = set->priv_flags & PFM_SETFL_PRIV_MOD_PMCS;
23457 + reload_pmds = set->priv_flags & PFM_SETFL_PRIV_MOD_PMDS;
23459 +#ifndef CONFIG_SMP
23460 + pfm_check_save_prev_ctx();
23466 + set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
23469 + pfm_arch_restore_pmds(ctx, set);
23472 + * need to check if had in-flight interrupt in
23473 + * pfm_ctxswout_thread(). If at least one bit set, then we must replay
23474 + * the interrupt to avoid losing some important performance data.
23476 + * npend_ovfls is cleared in interrupt handler
23478 + if (set->npend_ovfls) {
23479 + pfm_arch_resend_irq(ctx);
23480 + pfm_stats_inc(ovfl_intr_replay_count);
23484 + pfm_arch_restore_pmcs(ctx, set);
23487 + * record current activation for this context
23489 + __get_cpu_var(pmu_activation_number)++;
23490 + ctx->last_cpu = mycpu;
23491 + ctx->last_act = __get_cpu_var(pmu_activation_number);
23494 + * establish new ownership.
23496 + pfm_set_pmu_owner(task, ctx);
23498 + pfm_arch_ctxswin_thread(task, ctx);
23500 + * set->duration does not count when context in MASKED state.
23501 + * set->duration_start is reset in unmask_monitoring()
23503 + set->duration_start = now;
23506 + * re-arm switch timeout, if necessary
23507 + * Timeout is active only if monitoring is active,
23508 + * i.e., LOADED + started
23510 + * We reload the remainder timeout or the full timeout.
23511 + * Remainder is recorded on context switch out or in
23512 + * pfm_load_context()
23514 + if (ctx->state == PFM_CTX_LOADED
23515 + && (set->flags & PFM_SETFL_TIME_SWITCH) && is_active) {
23516 + pfm_restart_timer(ctx, set);
23517 + /* careful here as pfm_restart_timer may switch sets */
23520 + spin_unlock(&ctx->lock);
23524 + * interrupts are masked, runqueue lock is held.
23526 + * In UP. we simply stop monitoring and leave the state
23527 + * in place, i.e., lazy save
23529 +void __pfm_ctxswout_thread(struct task_struct *task,
23530 + struct pfm_context *ctx, u64 now)
23532 + struct pfm_event_set *set;
23533 + int need_save_pmds, is_active;
23536 + * we need to lock context because it could be accessed
23537 + * from another CPU. Normally the schedule() functions
23538 + * has masked interrupts which should be enough to
23539 + * protect against PMU interrupts.
23542 + spin_lock(&ctx->lock);
23544 + is_active = pfm_arch_is_active(ctx);
23545 + set = ctx->active_set;
23548 + * stop monitoring and
23549 + * collect pending overflow information
23550 + * needed on ctxswin. We cannot afford to lose
23551 + * a PMU interrupt.
23553 + need_save_pmds = pfm_arch_ctxswout_thread(task, ctx);
23555 + if (ctx->state == PFM_CTX_LOADED) {
23557 + * accumulate only when set is actively monitoring,
23559 + set->duration += now - set->duration_start;
23562 + * record remaining timeout
23563 + * reload in pfm_ctxsw_in()
23565 + if (is_active && (set->flags & PFM_SETFL_TIME_SWITCH)) {
23566 + struct hrtimer *h = NULL;
23567 + h = &__get_cpu_var(pfm_hrtimer);
23568 + hrtimer_cancel(h);
23569 + set->hrtimer_rem = hrtimer_get_remaining(h);
23570 + PFM_DBG_ovfl("hrtimer=%lld",
23571 + (long long)set->hrtimer_rem.tv64);
23577 + * in SMP, release ownership of this PMU.
23578 + * PMU interrupts are masked, so nothing
23581 + pfm_set_pmu_owner(NULL, NULL);
23584 + * On some architectures, it is necessary to read the
23585 + * PMD registers to check for pending overflow in
23586 + * pfm_arch_ctxswout_thread(). In that case, saving of
23587 + * the PMDs may be done there and not here.
23589 + if (need_save_pmds)
23590 + pfm_save_pmds(ctx, set);
23592 + spin_unlock(&ctx->lock);
23598 +static void __pfm_ctxswout_sys(struct task_struct *prev,
23599 + struct task_struct *next)
23601 + struct pfm_context *ctx;
23603 + ctx = __get_cpu_var(pmu_ctx);
23607 + * propagate TIF_PERFMON_CTXSW to ensure that:
23608 + * - previous task has TIF_PERFMON_CTXSW cleared, in case it is
23609 + * scheduled onto another CPU where there is syswide monitoring
23610 + * - next task has TIF_PERFMON_CTXSW set to ensure it will come back
23611 + * here when context switched out
23613 + clear_tsk_thread_flag(prev, TIF_PERFMON_CTXSW);
23614 + set_tsk_thread_flag(next, TIF_PERFMON_CTXSW);
23617 + * nothing to do until actually started
23618 + * XXX: assumes no mean to start from user level
23620 + if (!ctx->flags.started)
23623 + pfm_arch_ctxswout_sys(prev, ctx);
23629 +static void __pfm_ctxswin_sys(struct task_struct *prev,
23630 + struct task_struct *next)
23632 + struct pfm_context *ctx;
23634 + ctx = __get_cpu_var(pmu_ctx);
23638 + * nothing to do until actually started
23639 + * XXX: assumes no mean to start from user level
23641 + if (!ctx->flags.started)
23644 + pfm_arch_ctxswin_sys(next, ctx);
23647 +void pfm_ctxsw_out(struct task_struct *prev,
23648 + struct task_struct *next)
23650 + struct pfm_context *ctxp;
23653 + now = sched_clock();
23655 + ctxp = prev->pfm_context;
23658 + __pfm_ctxswout_thread(prev, ctxp, now);
23660 + __pfm_ctxswout_sys(prev, next);
23662 + pfm_stats_inc(ctxswout_count);
23663 + pfm_stats_add(ctxswout_ns, sched_clock() - now);
23666 +void pfm_ctxsw_in(struct task_struct *prev,
23667 + struct task_struct *next)
23669 + struct pfm_context *ctxn;
23672 + now = sched_clock();
23674 + ctxn = next->pfm_context;
23677 + __pfm_ctxswin_thread(next, ctxn, now);
23679 + __pfm_ctxswin_sys(prev, next);
23681 + pfm_stats_inc(ctxswin_count);
23682 + pfm_stats_add(ctxswin_ns, sched_clock() - now);
23684 diff --git a/perfmon/perfmon_debugfs.c b/perfmon/perfmon_debugfs.c
23685 new file mode 100644
23686 index 0000000..e4d2fad
23688 +++ b/perfmon/perfmon_debugfs.c
23691 + * perfmon_debugfs.c: perfmon2 statistics interface to debugfs
23693 + * This file implements the perfmon2 interface which
23694 + * provides access to the hardware performance counters
23695 + * of the host processor.
23697 + * The initial version of perfmon.c was written by
23698 + * Ganesh Venkitachalam, IBM Corp.
23700 + * Then it was modified for perfmon-1.x by Stephane Eranian and
23701 + * David Mosberger, Hewlett Packard Co.
23703 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
23704 + * by Stephane Eranian, Hewlett Packard Co.
23706 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
23707 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23709 + * More information about perfmon available at:
23710 + * http://perfmon2.sf.net
23712 + * This program is free software; you can redistribute it and/or
23713 + * modify it under the terms of version 2 of the GNU General Public
23714 + * License as published by the Free Software Foundation.
23716 + * This program is distributed in the hope that it will be useful,
23717 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23718 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23719 + * General Public License for more details.
23721 + * You should have received a copy of the GNU General Public License
23722 + * along with this program; if not, write to the Free Software
23723 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23726 +#include <linux/kernel.h>
23727 +#include <linux/debugfs.h>
23728 +#include <linux/perfmon_kern.h>
23731 + * to make the statistics visible to user space:
23732 + * $ mount -t debugfs none /mnt
23733 + * $ cd /mnt/perfmon
23734 + * then choose a CPU subdir
23736 +DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
23738 +static struct dentry *pfm_debugfs_dir;
23740 +void pfm_reset_stats(int cpu)
23742 + struct pfm_stats *st;
23743 + unsigned long flags;
23745 + st = &per_cpu(pfm_stats, cpu);
23747 + local_irq_save(flags);
23748 + memset(st->v, 0, sizeof(st->v));
23749 + local_irq_restore(flags);
23752 +static const char *pfm_stats_strs[] = {
23753 + "ovfl_intr_all_count",
23755 + "ovfl_intr_spurious_count",
23756 + "ovfl_intr_replay_count",
23757 + "ovfl_intr_regular_count",
23758 + "handle_work_count",
23759 + "ovfl_notify_count",
23760 + "reset_pmds_count",
23761 + "pfm_restart_count",
23762 + "fmt_handler_calls",
23763 + "fmt_handler_ns",
23764 + "set_switch_count",
23766 + "set_switch_exp",
23769 + "handle_timeout_count",
23770 + "ovfl_intr_nmi_count",
23771 + "ctxswout_count",
23774 +#define PFM_NUM_STRS ARRAY_SIZE(pfm_stats_strs)
23776 +void pfm_debugfs_del_cpu(int cpu)
23778 + struct pfm_stats *st;
23781 + st = &per_cpu(pfm_stats, cpu);
23783 + for (i = 0; i < PFM_NUM_STATS; i++) {
23785 + debugfs_remove(st->dirs[i]);
23786 + st->dirs[i] = NULL;
23789 + debugfs_remove(st->cpu_dir);
23790 + st->cpu_dir = NULL;
23793 +int pfm_debugfs_add_cpu(int cpu)
23795 + struct pfm_stats *st;
23799 + * sanity check between stats names and the number
23800 + * of entries in the pfm_stats value array.
23802 + if (PFM_NUM_STRS != PFM_NUM_STATS) {
23803 + PFM_ERR("PFM_NUM_STRS != PFM_NUM_STATS error");
23807 + st = &per_cpu(pfm_stats, cpu);
23808 + sprintf(st->cpu_name, "cpu%d", cpu);
23810 + st->cpu_dir = debugfs_create_dir(st->cpu_name, pfm_debugfs_dir);
23811 + if (!st->cpu_dir)
23814 + for (i = 0; i < PFM_NUM_STATS; i++) {
23815 + st->dirs[i] = debugfs_create_u64(pfm_stats_strs[i],
23819 + if (!st->dirs[i])
23822 + pfm_reset_stats(cpu);
23826 + debugfs_remove(st->dirs[i]);
23829 + debugfs_remove(st->cpu_dir);
23834 + * called once from pfm_init()
23836 +int __init pfm_init_debugfs(void)
23838 + int cpu1, cpu2, ret;
23840 + pfm_debugfs_dir = debugfs_create_dir("perfmon", NULL);
23841 + if (!pfm_debugfs_dir)
23844 + for_each_online_cpu(cpu1) {
23845 + ret = pfm_debugfs_add_cpu(cpu1);
23851 + for_each_online_cpu(cpu2) {
23852 + if (cpu2 == cpu1)
23854 + pfm_debugfs_del_cpu(cpu2);
23858 diff --git a/perfmon/perfmon_dfl_smpl.c b/perfmon/perfmon_dfl_smpl.c
23859 new file mode 100644
23860 index 0000000..8c83489
23862 +++ b/perfmon/perfmon_dfl_smpl.c
23865 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
23866 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23868 + * This file implements the new default sampling buffer format
23869 + * for the perfmon2 subsystem.
23871 + * This program is free software; you can redistribute it and/or
23872 + * modify it under the terms of version 2 of the GNU General Public
23873 + * License as published by the Free Software Foundation.
23875 + * This program is distributed in the hope that it will be useful,
23876 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23877 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23878 + * General Public License for more details.
23880 + * You should have received a copy of the GNU General Public License
23881 + * along with this program; if not, write to the Free Software
23882 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23885 +#include <linux/kernel.h>
23886 +#include <linux/types.h>
23887 +#include <linux/module.h>
23888 +#include <linux/init.h>
23889 +#include <linux/smp.h>
23891 +#include <linux/perfmon_kern.h>
23892 +#include <linux/perfmon_dfl_smpl.h>
23894 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
23895 +MODULE_DESCRIPTION("new perfmon default sampling format");
23896 +MODULE_LICENSE("GPL");
23898 +static int pfm_dfl_fmt_validate(u32 ctx_flags, u16 npmds, void *data)
23900 + struct pfm_dfl_smpl_arg *arg = data;
23901 + u64 min_buf_size;
23903 + if (data == NULL) {
23904 + PFM_DBG("no argument passed");
23909 + * sanity check in case size_t is smaller then u64
23911 +#if BITS_PER_LONG == 4
23912 +#define MAX_SIZE_T (1ULL<<(sizeof(size_t)<<3))
23913 + if (sizeof(size_t) < sizeof(arg->buf_size)) {
23914 + if (arg->buf_size >= MAX_SIZE_T)
23920 + * compute min buf size. npmds is the maximum number
23921 + * of implemented PMD registers.
23923 + min_buf_size = sizeof(struct pfm_dfl_smpl_hdr)
23924 + + (sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64)));
23926 + PFM_DBG("validate ctx_flags=0x%x flags=0x%x npmds=%u "
23927 + "min_buf_size=%llu buf_size=%llu\n",
23931 + (unsigned long long)min_buf_size,
23932 + (unsigned long long)arg->buf_size);
23935 + * must hold at least the buffer header + one minimally sized entry
23937 + if (arg->buf_size < min_buf_size)
23943 +static int pfm_dfl_fmt_get_size(u32 flags, void *data, size_t *size)
23945 + struct pfm_dfl_smpl_arg *arg = data;
23948 + * size has been validated in default_validate
23949 + * we can never loose bits from buf_size.
23951 + *size = (size_t)arg->buf_size;
23956 +static int pfm_dfl_fmt_init(struct pfm_context *ctx, void *buf, u32 ctx_flags,
23957 + u16 npmds, void *data)
23959 + struct pfm_dfl_smpl_hdr *hdr;
23960 + struct pfm_dfl_smpl_arg *arg = data;
23964 + hdr->hdr_version = PFM_DFL_SMPL_VERSION;
23965 + hdr->hdr_buf_size = arg->buf_size;
23966 + hdr->hdr_buf_flags = arg->buf_flags;
23967 + hdr->hdr_cur_offs = sizeof(*hdr);
23968 + hdr->hdr_overflows = 0;
23969 + hdr->hdr_count = 0;
23970 + hdr->hdr_min_buf_space = sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64));
23972 + * due to cache aliasing, it may be necessary to flush the cache
23973 + * on certain architectures (e.g., MIPS)
23975 + pfm_cacheflush(hdr, sizeof(*hdr));
23977 + PFM_DBG("buffer=%p buf_size=%llu hdr_size=%zu hdr_version=%u.%u "
23978 + "min_space=%llu npmds=%u",
23980 + (unsigned long long)hdr->hdr_buf_size,
23982 + PFM_VERSION_MAJOR(hdr->hdr_version),
23983 + PFM_VERSION_MINOR(hdr->hdr_version),
23984 + (unsigned long long)hdr->hdr_min_buf_space,
23991 + * called from pfm_overflow_handler() to record a new sample
23993 + * context is locked, interrupts are disabled (no preemption)
23995 +static int pfm_dfl_fmt_handler(struct pfm_context *ctx,
23996 + unsigned long ip, u64 tstamp, void *data)
23998 + struct pfm_dfl_smpl_hdr *hdr;
23999 + struct pfm_dfl_smpl_entry *ent;
24000 + struct pfm_ovfl_arg *arg;
24001 + void *cur, *last;
24003 + size_t entry_size, min_size;
24008 + hdr = ctx->smpl_addr;
24009 + arg = &ctx->ovfl_arg;
24012 + cur = buf+hdr->hdr_cur_offs;
24013 + last = buf+hdr->hdr_buf_size;
24014 + ovfl_pmd = arg->ovfl_pmd;
24015 + min_size = hdr->hdr_min_buf_space;
24018 + * precheck for sanity
24020 + if ((last - cur) < min_size)
24023 + npmds = arg->num_smpl_pmds;
24025 + ent = (struct pfm_dfl_smpl_entry *)cur;
24027 + entry_size = sizeof(*ent) + (npmds << 3);
24029 + /* position for first pmd */
24030 + e = (u64 *)(ent+1);
24032 + hdr->hdr_count++;
24034 + PFM_DBG_ovfl("count=%llu cur=%p last=%p free_bytes=%zu ovfl_pmd=%d "
24036 + (unsigned long long)hdr->hdr_count,
24043 + * current = task running at the time of the overflow.
24046 + * - this is usually the task being monitored.
24047 + * Under certain conditions, it might be a different task
24050 + * - this is not necessarily the task controlling the session
24052 + ent->pid = current->pid;
24053 + ent->ovfl_pmd = ovfl_pmd;
24054 + ent->last_reset_val = arg->pmd_last_reset;
24057 + * where did the fault happen (includes slot number)
24061 + ent->tstamp = tstamp;
24062 + ent->cpu = smp_processor_id();
24063 + ent->set = arg->active_set;
24064 + ent->tgid = current->tgid;
24067 + * selectively store PMDs in increasing index number
24070 + u64 *val = arg->smpl_pmds_values;
24071 + for (i = 0; i < npmds; i++)
24076 + * update position for next entry
24078 + hdr->hdr_cur_offs += entry_size;
24079 + cur += entry_size;
24081 + pfm_cacheflush(hdr, sizeof(*hdr));
24082 + pfm_cacheflush(ent, entry_size);
24085 + * post check to avoid losing the last sample
24087 + if ((last - cur) < min_size)
24090 + /* reset before returning from interrupt handler */
24091 + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
24095 + PFM_DBG_ovfl("sampling buffer full free=%zu, count=%llu",
24097 + (unsigned long long)hdr->hdr_count);
24100 + * increment number of buffer overflows.
24101 + * important to detect duplicate set of samples.
24103 + hdr->hdr_overflows++;
24106 + * request notification and masking of monitoring.
24107 + * Notification is still subject to the overflowed
24108 + * register having the FL_NOTIFY flag set.
24110 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
24112 + return -ENOBUFS; /* we are full, sorry */
24115 +static int pfm_dfl_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf)
24117 + struct pfm_dfl_smpl_hdr *hdr;
24121 + hdr->hdr_count = 0;
24122 + hdr->hdr_cur_offs = sizeof(*hdr);
24124 + pfm_cacheflush(hdr, sizeof(*hdr));
24126 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
24131 +static int pfm_dfl_fmt_exit(void *buf)
24136 +static struct pfm_smpl_fmt dfl_fmt = {
24137 + .fmt_name = "default",
24138 + .fmt_version = 0x10000,
24139 + .fmt_arg_size = sizeof(struct pfm_dfl_smpl_arg),
24140 + .fmt_validate = pfm_dfl_fmt_validate,
24141 + .fmt_getsize = pfm_dfl_fmt_get_size,
24142 + .fmt_init = pfm_dfl_fmt_init,
24143 + .fmt_handler = pfm_dfl_fmt_handler,
24144 + .fmt_restart = pfm_dfl_fmt_restart,
24145 + .fmt_exit = pfm_dfl_fmt_exit,
24146 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
24147 + .owner = THIS_MODULE
24150 +static int pfm_dfl_fmt_init_module(void)
24152 + return pfm_fmt_register(&dfl_fmt);
24155 +static void pfm_dfl_fmt_cleanup_module(void)
24157 + pfm_fmt_unregister(&dfl_fmt);
24160 +module_init(pfm_dfl_fmt_init_module);
24161 +module_exit(pfm_dfl_fmt_cleanup_module);
24162 diff --git a/perfmon/perfmon_file.c b/perfmon/perfmon_file.c
24163 new file mode 100644
24164 index 0000000..1cde81b
24166 +++ b/perfmon/perfmon_file.c
24169 + * perfmon_file.c: perfmon2 file input/output functions
24171 + * This file implements the perfmon2 interface which
24172 + * provides access to the hardware performance counters
24173 + * of the host processor.
24175 + * The initial version of perfmon.c was written by
24176 + * Ganesh Venkitachalam, IBM Corp.
24178 + * Then it was modified for perfmon-1.x by Stephane Eranian and
24179 + * David Mosberger, Hewlett Packard Co.
24181 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
24182 + * by Stephane Eranian, Hewlett Packard Co.
24184 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
24185 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
24186 + * David Mosberger-Tang <davidm@hpl.hp.com>
24188 + * More information about perfmon available at:
24189 + * http://perfmon2.sf.net
24191 + * This program is free software; you can redistribute it and/or
24192 + * modify it under the terms of version 2 of the GNU General Public
24193 + * License as published by the Free Software Foundation.
24195 + * This program is distributed in the hope that it will be useful,
24196 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24197 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24198 + * General Public License for more details.
24200 + * You should have received a copy of the GNU General Public License
24201 + * along with this program; if not, write to the Free Software
24202 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24205 +#include <linux/kernel.h>
24206 +#include <linux/module.h>
24207 +#include <linux/file.h>
24208 +#include <linux/poll.h>
24209 +#include <linux/vfs.h>
24210 +#include <linux/pagemap.h>
24211 +#include <linux/mount.h>
24212 +#include <linux/perfmon_kern.h>
24213 +#include "perfmon_priv.h"
24215 +#define PFMFS_MAGIC 0xa0b4d889 /* perfmon filesystem magic number */
24217 +struct pfm_controls pfm_controls = {
24218 + .sys_group = PFM_GROUP_PERM_ANY,
24219 + .task_group = PFM_GROUP_PERM_ANY,
24220 + .arg_mem_max = PAGE_SIZE,
24221 + .smpl_buffer_mem_max = ~0,
24223 +EXPORT_SYMBOL(pfm_controls);
24225 +static int __init enable_debug(char *str)
24227 + pfm_controls.debug = 1;
24228 + PFM_INFO("debug output enabled\n");
24231 +__setup("perfmon_debug", enable_debug);
24233 +static int pfmfs_delete_dentry(struct dentry *dentry)
24238 +static struct dentry_operations pfmfs_dentry_operations = {
24239 + .d_delete = pfmfs_delete_dentry,
24242 +int pfm_buf_map_pagefault(struct vm_area_struct *vma, struct vm_fault *vmf)
24245 + unsigned long address;
24246 + struct pfm_context *ctx;
24249 + address = (unsigned long)vmf->virtual_address;
24251 + ctx = vma->vm_private_data;
24252 + if (ctx == NULL) {
24253 + PFM_DBG("no ctx");
24254 + return VM_FAULT_SIGBUS;
24257 + * size available to user (maybe different from real_smpl_size
24259 + size = ctx->smpl_size;
24261 + if ((address < vma->vm_start) ||
24262 + (address >= (vma->vm_start + size)))
24263 + return VM_FAULT_SIGBUS;
24265 + kaddr = ctx->smpl_addr + (address - vma->vm_start);
24267 + vmf->page = vmalloc_to_page(kaddr);
24268 + get_page(vmf->page);
24270 + PFM_DBG("[%d] start=%p ref_count=%d",
24272 + kaddr, page_count(vmf->page));
24278 + * we need to determine whther or not we are closing the last reference
24279 + * to the file and thus are going to end up in pfm_close() which eventually
24280 + * calls pfm_release_buf_space(). In that function, we update the accouting
24281 + * for locked_vm given that we are actually freeing the sampling buffer. The
24282 + * issue is that there are multiple paths leading to pfm_release_buf_space(),
24283 + * from exit(), munmap(), close(). The path coming from munmap() is problematic
24284 + * becuse do_munmap() grabs mmap_sem in write-mode which is also what
24285 + * pfm_release_buf_space does. To avoid deadlock, we need to determine where
24286 + * we are calling from and skip the locking. The vm_ops->close() callback
24287 + * is invoked for each remove_vma() independently of the number of references
24288 + * left on the file descriptor, therefore simple reference counter does not
24289 + * work. We need to determine if this is the last call, and then set a flag
24290 + * to skip the locking.
24292 +static void pfm_buf_map_close(struct vm_area_struct *vma)
24294 + struct file *file;
24295 + struct pfm_context *ctx;
24297 + file = vma->vm_file;
24298 + ctx = vma->vm_private_data;
24301 + * if file is going to close, then pfm_close() will
24302 + * be called, do not lock in pfm_release_buf
24304 + if (atomic_read(&file->f_count) == 1)
24305 + ctx->flags.mmap_nlock = 1;
24309 + * we do not have a close callback because, the locked
24310 + * memory accounting must be done when the actual buffer
24311 + * is freed. Munmap does not free the page backing the vma
24312 + * because they may still be in use by the PMU interrupt handler.
24314 +struct vm_operations_struct pfm_buf_map_vm_ops = {
24315 + .fault = pfm_buf_map_pagefault,
24316 + .close = pfm_buf_map_close
24319 +static int pfm_mmap_buffer(struct pfm_context *ctx, struct vm_area_struct *vma,
24322 + if (ctx->smpl_addr == NULL) {
24323 + PFM_DBG("no sampling buffer to map");
24327 + if (size > ctx->smpl_size) {
24328 + PFM_DBG("mmap size=%zu >= actual buf size=%zu",
24334 + vma->vm_ops = &pfm_buf_map_vm_ops;
24335 + vma->vm_private_data = ctx;
24340 +static int pfm_mmap(struct file *file, struct vm_area_struct *vma)
24343 + struct pfm_context *ctx;
24344 + unsigned long flags;
24347 + PFM_DBG("pfm_file_ops");
24349 + ctx = file->private_data;
24350 + size = (vma->vm_end - vma->vm_start);
24357 + spin_lock_irqsave(&ctx->lock, flags);
24359 + if (vma->vm_flags & VM_WRITE) {
24360 + PFM_DBG("cannot map buffer for writing");
24364 + PFM_DBG("vm_pgoff=%lu size=%zu vm_start=0x%lx",
24369 + ret = pfm_mmap_buffer(ctx, vma, size);
24371 + vma->vm_flags |= VM_RESERVED;
24373 + PFM_DBG("ret=%d vma_flags=0x%lx vma_start=0x%lx vma_size=%lu",
24377 + vma->vm_end-vma->vm_start);
24379 + spin_unlock_irqrestore(&ctx->lock, flags);
24385 + * Extract one message from queue.
24388 + * -EAGAIN: when non-blocking and nothing is* in the queue.
24389 + * -ERESTARTSYS: when blocking and signal is pending
24390 + * Otherwise returns size of message (sizeof(pfarg_msg))
24392 +ssize_t __pfm_read(struct pfm_context *ctx, union pfarg_msg *msg_buf, int non_block)
24395 + unsigned long flags;
24396 + DECLARE_WAITQUEUE(wait, current);
24399 + * we must masks interrupts to avoid a race condition
24400 + * with the PMU interrupt handler.
24402 + spin_lock_irqsave(&ctx->lock, flags);
24404 + while (pfm_msgq_is_empty(ctx)) {
24407 + * handle non-blocking reads
24414 + add_wait_queue(&ctx->msgq_wait, &wait);
24415 + set_current_state(TASK_INTERRUPTIBLE);
24417 + spin_unlock_irqrestore(&ctx->lock, flags);
24422 + * during this window, another thread may call
24423 + * pfm_read() and steal our message
24426 + spin_lock_irqsave(&ctx->lock, flags);
24428 + remove_wait_queue(&ctx->msgq_wait, &wait);
24429 + set_current_state(TASK_RUNNING);
24432 + * check for pending signals
24433 + * return -ERESTARTSYS
24435 + ret = -ERESTARTSYS;
24436 + if (signal_pending(current))
24440 + * we may have a message
24446 + * extract message
24450 + * copy the oldest message into msg_buf.
24451 + * We cannot directly call copy_to_user()
24452 + * because interrupts masked. This is done
24455 + pfm_get_next_msg(ctx, msg_buf);
24457 + ret = sizeof(*msg_buf);
24459 + PFM_DBG("extracted type=%d", msg_buf->type);
24462 + spin_unlock_irqrestore(&ctx->lock, flags);
24464 + PFM_DBG("blocking=%d ret=%zd", non_block, ret);
24469 +static ssize_t pfm_read(struct file *filp, char __user *buf, size_t size,
24472 + struct pfm_context *ctx;
24473 + union pfarg_msg msg_buf;
24474 + int non_block, ret;
24476 + PFM_DBG_ovfl("buf=%p size=%zu", buf, size);
24478 + ctx = filp->private_data;
24479 + if (ctx == NULL) {
24480 + PFM_ERR("no ctx for pfm_read");
24484 + non_block = filp->f_flags & O_NONBLOCK;
24486 +#ifdef CONFIG_IA64_PERFMON_COMPAT
24488 + * detect IA-64 v2.0 context read (message size is different)
24489 + * nops on all other architectures
24491 + if (unlikely(ctx->flags.ia64_v20_compat))
24492 + return pfm_arch_compat_read(ctx, buf, non_block, size);
24495 + * cannot extract partial messages.
24496 + * check even when there is no message
24498 + * cannot extract more than one message per call. Bytes
24499 + * above sizeof(msg) are ignored.
24501 + if (size < sizeof(msg_buf)) {
24502 + PFM_DBG("message is too small size=%zu must be >=%zu)",
24504 + sizeof(msg_buf));
24508 + ret = __pfm_read(ctx, &msg_buf, non_block);
24510 + if (copy_to_user(buf, &msg_buf, sizeof(msg_buf)))
24513 + PFM_DBG_ovfl("ret=%d", ret);
24517 +static ssize_t pfm_write(struct file *file, const char __user *ubuf,
24518 + size_t size, loff_t *ppos)
24520 + PFM_DBG("pfm_write called");
24524 +static unsigned int pfm_poll(struct file *filp, poll_table *wait)
24526 + struct pfm_context *ctx;
24527 + unsigned long flags;
24528 + unsigned int mask = 0;
24530 + PFM_DBG("pfm_file_ops");
24532 + if (filp->f_op != &pfm_file_ops) {
24533 + PFM_ERR("pfm_poll bad magic");
24537 + ctx = filp->private_data;
24538 + if (ctx == NULL) {
24539 + PFM_ERR("pfm_poll no ctx");
24543 + PFM_DBG("before poll_wait");
24545 + poll_wait(filp, &ctx->msgq_wait, wait);
24548 + * pfm_msgq_is_empty() is non-atomic
24550 + * filp is protected by fget() at upper level
24551 + * context cannot be closed by another thread.
24553 + * There may be a race with a PMU interrupt adding
24554 + * messages to the queue. But we are interested in
24555 + * queue not empty, so adding more messages should
24556 + * not really be a problem.
24558 + * There may be a race with another thread issuing
24559 + * a read() and stealing messages from the queue thus
24560 + * may return the wrong answer. This could potentially
24561 + * lead to a blocking read, because nothing is
24562 + * available in the queue
24564 + spin_lock_irqsave(&ctx->lock, flags);
24566 + if (!pfm_msgq_is_empty(ctx))
24567 + mask = POLLIN | POLLRDNORM;
24569 + spin_unlock_irqrestore(&ctx->lock, flags);
24571 + PFM_DBG("after poll_wait mask=0x%x", mask);
24576 +static int pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
24577 + unsigned long arg)
24579 + PFM_DBG("pfm_ioctl called");
24584 + * interrupt cannot be masked when entering this function
24586 +static inline int __pfm_fasync(int fd, struct file *filp,
24587 + struct pfm_context *ctx, int on)
24591 + PFM_DBG("in fd=%d on=%d async_q=%p",
24594 + ctx->async_queue);
24596 + ret = fasync_helper(fd, filp, on, &ctx->async_queue);
24598 + PFM_DBG("out fd=%d on=%d async_q=%p ret=%d",
24601 + ctx->async_queue, ret);
24606 +static int pfm_fasync(int fd, struct file *filp, int on)
24608 + struct pfm_context *ctx;
24611 + PFM_DBG("pfm_file_ops");
24613 + ctx = filp->private_data;
24614 + if (ctx == NULL) {
24615 + PFM_ERR("pfm_fasync no ctx");
24620 + * we cannot mask interrupts during this call because this may
24621 + * may go to sleep if memory is not readily avalaible.
24623 + * We are protected from the context disappearing by the
24624 + * get_fd()/put_fd() done in caller. Serialization of this function
24625 + * is ensured by caller.
24627 + ret = __pfm_fasync(fd, filp, ctx, on);
24629 + PFM_DBG("pfm_fasync called on fd=%d on=%d async_queue=%p ret=%d",
24632 + ctx->async_queue, ret);
24638 +static void __pfm_close_remote_cpu(void *info)
24640 + struct pfm_context *ctx = info;
24643 + BUG_ON(ctx != __get_cpu_var(pmu_ctx));
24646 + * we are in IPI interrupt handler which has always higher
24647 + * priority than PMU interrupt, therefore we do not need to
24648 + * mask interrupts. context locking is not needed because we
24649 + * are in close(), no more user references.
24651 + * can_release is ignored, release done on calling CPU
24653 + __pfm_unload_context(ctx, &can_release);
24656 + * we cannot free context here because we are in_interrupt().
24657 + * we free on the calling CPU
24661 +static int pfm_close_remote_cpu(u32 cpu, struct pfm_context *ctx)
24663 + BUG_ON(irqs_disabled());
24664 + return smp_call_function_single(cpu, __pfm_close_remote_cpu, ctx, 1);
24666 +#endif /* CONFIG_SMP */
24669 + * called either on explicit close() or from exit_files().
24670 + * Only the LAST user of the file gets to this point, i.e., it is
24671 + * called only ONCE.
24673 + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
24674 + * (fput()),i.e, last task to access the file. Nobody else can access the
24675 + * file at this point.
24677 + * When called from exit_files(), the VMA has been freed because exit_mm()
24678 + * is executed before exit_files().
24680 + * When called from exit_files(), the current task is not yet ZOMBIE but we
24681 + * flush the PMU state to the context.
24683 +int __pfm_close(struct pfm_context *ctx, struct file *filp)
24685 + unsigned long flags;
24687 + int can_free = 1, can_unload = 1;
24688 + int is_system, can_release = 0;
24692 + * no risk of ctx of filp disappearing so we can operate outside
24693 + * of spin_lock(). fasync_helper() runs with interrupts masked,
24694 + * thus there is no risk with the PMU interrupt handler
24696 + * In case of zombie, we will not have the async struct anymore
24697 + * thus kill_fasync() will not do anything
24699 + * fd is not used when removing the entry so we pass -1
24701 + if (filp->f_flags & FASYNC)
24702 + __pfm_fasync (-1, filp, ctx, 0);
24704 + spin_lock_irqsave(&ctx->lock, flags);
24706 + state = ctx->state;
24707 + is_system = ctx->flags.system;
24710 + PFM_DBG("state=%d", state);
24713 + * check if unload is needed
24715 + if (state == PFM_CTX_UNLOADED)
24720 + * we need to release the resource on the ORIGINAL cpu.
24721 + * we need to release the context lock to avoid deadlocks
24722 + * on the original CPU, especially in the context switch
24723 + * routines. It is safe to unlock because we are in close(),
24724 + * in other words, there is no more access from user level.
24725 + * we can also unmask interrupts on this CPU because the
24726 + * context is running on the original CPU. Context will be
24727 + * unloaded and the session will be released on the original
24728 + * CPU. Upon return, the caller is guaranteed that the context
24729 + * is gone from original CPU.
24731 + if (is_system && cpu != smp_processor_id()) {
24732 + spin_unlock_irqrestore(&ctx->lock, flags);
24733 + pfm_close_remote_cpu(cpu, ctx);
24738 + if (!is_system && ctx->task != current) {
24740 + * switch context to zombie state
24742 + ctx->state = PFM_CTX_ZOMBIE;
24744 + PFM_DBG("zombie ctx for [%d]", ctx->task->pid);
24746 + * must check if other thread is using block overflow
24747 + * notification mode. If so make sure it will not block
24748 + * because there will not be any pfm_restart() issued.
24749 + * When the thread notices the ZOMBIE state, it will clean
24750 + * up what is left of the context
24752 + if (state == PFM_CTX_MASKED && ctx->flags.block) {
24754 + * force task to wake up from MASKED state
24756 + PFM_DBG("waking up [%d]", ctx->task->pid);
24758 + complete(&ctx->restart_complete);
24761 + * PMU session will be release by monitored task when it notices
24762 + * ZOMBIE state as part of pfm_unload_context()
24764 + can_unload = can_free = 0;
24768 + __pfm_unload_context(ctx, &can_release);
24770 + spin_unlock_irqrestore(&ctx->lock, flags);
24776 + pfm_session_release(is_system, cpu);
24779 + pfm_free_context(ctx);
24784 +static int pfm_close(struct inode *inode, struct file *filp)
24786 + struct pfm_context *ctx;
24788 + PFM_DBG("called filp=%p", filp);
24790 + ctx = filp->private_data;
24791 + if (ctx == NULL) {
24792 + PFM_ERR("no ctx");
24795 + return __pfm_close(ctx, filp);
24798 +static int pfm_no_open(struct inode *irrelevant, struct file *dontcare)
24800 + PFM_DBG("pfm_file_ops");
24806 +const struct file_operations pfm_file_ops = {
24807 + .llseek = no_llseek,
24808 + .read = pfm_read,
24809 + .write = pfm_write,
24810 + .poll = pfm_poll,
24811 + .ioctl = pfm_ioctl,
24812 + .open = pfm_no_open, /* special open to disallow open via /proc */
24813 + .fasync = pfm_fasync,
24814 + .release = pfm_close,
24818 +static int pfmfs_get_sb(struct file_system_type *fs_type,
24819 + int flags, const char *dev_name,
24820 + void *data, struct vfsmount *mnt)
24822 + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
24825 +static struct file_system_type pfm_fs_type = {
24827 + .get_sb = pfmfs_get_sb,
24828 + .kill_sb = kill_anon_super,
24832 + * pfmfs should _never_ be mounted by userland - too much of security hassle,
24833 + * no real gain from having the whole whorehouse mounted. So we don't need
24834 + * any operations on the root directory. However, we need a non-trivial
24835 + * d_name - pfm: will go nicely and kill the special-casing in procfs.
24837 +static struct vfsmount *pfmfs_mnt;
24839 +int __init pfm_init_fs(void)
24841 + int err = register_filesystem(&pfm_fs_type);
24843 + pfmfs_mnt = kern_mount(&pfm_fs_type);
24844 + err = PTR_ERR(pfmfs_mnt);
24845 + if (IS_ERR(pfmfs_mnt))
24846 + unregister_filesystem(&pfm_fs_type);
24853 +int pfm_alloc_fd(struct file **cfile)
24856 + struct file *file = NULL;
24857 + struct inode * inode;
24859 + struct qstr this;
24861 + fd = get_unused_fd();
24867 + file = get_empty_filp();
24872 + * allocate a new inode
24874 + inode = new_inode(pfmfs_mnt->mnt_sb);
24878 + PFM_DBG("new inode ino=%ld @%p", inode->i_ino, inode);
24880 + inode->i_sb = pfmfs_mnt->mnt_sb;
24881 + inode->i_mode = S_IFCHR|S_IRUGO;
24882 + inode->i_uid = current->fsuid;
24883 + inode->i_gid = current->fsgid;
24885 + sprintf(name, "[%lu]", inode->i_ino);
24886 + this.name = name;
24887 + this.hash = inode->i_ino;
24888 + this.len = strlen(name);
24893 + * allocate a new dcache entry
24895 + file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
24896 + if (!file->f_dentry)
24899 + file->f_dentry->d_op = &pfmfs_dentry_operations;
24901 + d_add(file->f_dentry, inode);
24902 + file->f_vfsmnt = mntget(pfmfs_mnt);
24903 + file->f_mapping = inode->i_mapping;
24905 + file->f_op = &pfm_file_ops;
24906 + file->f_mode = FMODE_READ;
24907 + file->f_flags = O_RDONLY;
24916 + put_unused_fd(fd);
24919 diff --git a/perfmon/perfmon_fmt.c b/perfmon/perfmon_fmt.c
24920 new file mode 100644
24921 index 0000000..27c4340
24923 +++ b/perfmon/perfmon_fmt.c
24926 + * perfmon_fmt.c: perfmon2 sampling buffer format management
24928 + * This file implements the perfmon2 interface which
24929 + * provides access to the hardware performance counters
24930 + * of the host processor.
24932 + * The initial version of perfmon.c was written by
24933 + * Ganesh Venkitachalam, IBM Corp.
24935 + * Then it was modified for perfmon-1.x by Stephane Eranian and
24936 + * David Mosberger, Hewlett Packard Co.
24938 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
24939 + * by Stephane Eranian, Hewlett Packard Co.
24941 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
24942 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
24943 + * David Mosberger-Tang <davidm@hpl.hp.com>
24945 + * More information about perfmon available at:
24946 + * http://perfmon2.sf.net
24948 + * This program is free software; you can redistribute it and/or
24949 + * modify it under the terms of version 2 of the GNU General Public
24950 + * License as published by the Free Software Foundation.
24952 + * This program is distributed in the hope that it will be useful,
24953 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24954 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24955 + * General Public License for more details.
24957 + * You should have received a copy of the GNU General Public License
24958 + * along with this program; if not, write to the Free Software
24959 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24962 +#include <linux/module.h>
24963 +#include <linux/perfmon_kern.h>
24964 +#include "perfmon_priv.h"
24966 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_smpl_fmt_lock);
24967 +static LIST_HEAD(pfm_smpl_fmt_list);
24969 +static inline int fmt_is_mod(struct pfm_smpl_fmt *f)
24971 + return !(f->fmt_flags & PFM_FMTFL_IS_BUILTIN);
24974 +static struct pfm_smpl_fmt *pfm_find_fmt(char *name)
24976 + struct pfm_smpl_fmt *entry;
24978 + list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) {
24979 + if (!strcmp(entry->fmt_name, name))
24985 + * find a buffer format based on its name
24987 +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name)
24989 + struct pfm_smpl_fmt *fmt;
24991 + spin_lock(&pfm_smpl_fmt_lock);
24993 + fmt = pfm_find_fmt(name);
24996 + * increase module refcount
24998 + if (fmt && fmt_is_mod(fmt) && !try_module_get(fmt->owner))
25001 + spin_unlock(&pfm_smpl_fmt_lock);
25006 +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt)
25008 + if (fmt == NULL || !fmt_is_mod(fmt))
25010 + BUG_ON(fmt->owner == NULL);
25012 + spin_lock(&pfm_smpl_fmt_lock);
25013 + module_put(fmt->owner);
25014 + spin_unlock(&pfm_smpl_fmt_lock);
25017 +int pfm_fmt_register(struct pfm_smpl_fmt *fmt)
25021 + if (perfmon_disabled) {
25022 + PFM_INFO("perfmon disabled, cannot add sampling format");
25026 + /* some sanity checks */
25027 + if (fmt == NULL) {
25028 + PFM_INFO("perfmon: NULL format for register");
25032 + if (fmt->fmt_name == NULL) {
25033 + PFM_INFO("perfmon: format has no name");
25037 + if (fmt->fmt_qdepth > PFM_MSGS_COUNT) {
25038 + PFM_INFO("perfmon: format %s requires %u msg queue depth (max %d)",
25046 + * fmt is missing the initialization of .owner = THIS_MODULE
25047 + * this is only valid when format is compiled as a module
25049 + if (fmt->owner == NULL && fmt_is_mod(fmt)) {
25050 + PFM_INFO("format %s has no module owner", fmt->fmt_name);
25054 + * we need at least a handler
25056 + if (fmt->fmt_handler == NULL) {
25057 + PFM_INFO("format %s has no handler", fmt->fmt_name);
25062 + * format argument size cannot be bigger than PAGE_SIZE
25064 + if (fmt->fmt_arg_size > PAGE_SIZE) {
25065 + PFM_INFO("format %s arguments too big", fmt->fmt_name);
25069 + spin_lock(&pfm_smpl_fmt_lock);
25072 + * because of sysfs, we cannot have two formats with the same name
25074 + if (pfm_find_fmt(fmt->fmt_name)) {
25075 + PFM_INFO("format %s already registered", fmt->fmt_name);
25080 + ret = pfm_sysfs_add_fmt(fmt);
25082 + PFM_INFO("sysfs cannot add format entry for %s", fmt->fmt_name);
25086 + list_add(&fmt->fmt_list, &pfm_smpl_fmt_list);
25088 + PFM_INFO("added sampling format %s", fmt->fmt_name);
25090 + spin_unlock(&pfm_smpl_fmt_lock);
25094 +EXPORT_SYMBOL(pfm_fmt_register);
25096 +int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt)
25098 + struct pfm_smpl_fmt *fmt2;
25101 + if (!fmt || !fmt->fmt_name) {
25102 + PFM_DBG("invalid fmt");
25106 + spin_lock(&pfm_smpl_fmt_lock);
25108 + fmt2 = pfm_find_fmt(fmt->fmt_name);
25110 + PFM_INFO("unregister failed, format not registered");
25114 + list_del_init(&fmt->fmt_list);
25116 + pfm_sysfs_remove_fmt(fmt);
25118 + PFM_INFO("removed sampling format: %s", fmt->fmt_name);
25121 + spin_unlock(&pfm_smpl_fmt_lock);
25125 +EXPORT_SYMBOL(pfm_fmt_unregister);
25128 + * we defer adding the builtin formats to /sys/kernel/perfmon/formats
25129 + * until after the pfm sysfs subsystem is initialized. This function
25130 + * is called from pfm_init_sysfs()
25132 +void __init pfm_sysfs_builtin_fmt_add(void)
25134 + struct pfm_smpl_fmt *entry;
25137 + * locking not needed, kernel not fully booted
25140 + list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) {
25141 + pfm_sysfs_add_fmt(entry);
25144 diff --git a/perfmon/perfmon_hotplug.c b/perfmon/perfmon_hotplug.c
25145 new file mode 100644
25146 index 0000000..eaaba81
25148 +++ b/perfmon/perfmon_hotplug.c
25151 + * perfmon_hotplug.c: handling of CPU hotplug
25153 + * The initial version of perfmon.c was written by
25154 + * Ganesh Venkitachalam, IBM Corp.
25156 + * Then it was modified for perfmon-1.x by Stephane Eranian and
25157 + * David Mosberger, Hewlett Packard Co.
25159 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
25160 + * by Stephane Eranian, Hewlett Packard Co.
25162 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
25163 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
25164 + * David Mosberger-Tang <davidm@hpl.hp.com>
25166 + * More information about perfmon available at:
25167 + * http://perfmon2.sf.net
25169 + * This program is free software; you can redistribute it and/or
25170 + * modify it under the terms of version 2 of the GNU General Public
25171 + * License as published by the Free Software Foundation.
25173 + * This program is distributed in the hope that it will be useful,
25174 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25175 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25176 + * General Public License for more details.
25178 + * You should have received a copy of the GNU General Public License
25179 + * along with this program; if not, write to the Free Software
25180 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25183 +#include <linux/kernel.h>
25184 +#include <linux/perfmon_kern.h>
25185 +#include <linux/cpu.h>
25186 +#include "perfmon_priv.h"
25188 +#ifndef CONFIG_HOTPLUG_CPU
25189 +void pfm_cpu_disable(void)
25192 +int __init pfm_init_hotplug(void)
25196 +#else /* CONFIG_HOTPLUG_CPU */
25198 + * CPU hotplug event nofication callback
25200 + * We use the callback to do manage the sysfs interface.
25201 + * Note that the actual shutdown of monitoring on the CPU
25202 + * is done in pfm_cpu_disable(), see comments there for more
25205 +static int pfm_cpu_notify(struct notifier_block *nfb,
25206 + unsigned long action, void *hcpu)
25208 + unsigned int cpu = (unsigned long)hcpu;
25209 + int ret = NOTIFY_OK;
25211 + pfm_pmu_conf_get(0);
25213 + switch (action) {
25215 + pfm_debugfs_add_cpu(cpu);
25216 + PFM_INFO("CPU%d is online", cpu);
25218 + case CPU_UP_PREPARE:
25219 + PFM_INFO("CPU%d prepare online", cpu);
25221 + case CPU_UP_CANCELED:
25222 + pfm_debugfs_del_cpu(cpu);
25223 + PFM_INFO("CPU%d is up canceled", cpu);
25225 + case CPU_DOWN_PREPARE:
25226 + PFM_INFO("CPU%d prepare offline", cpu);
25228 + case CPU_DOWN_FAILED:
25229 + PFM_INFO("CPU%d is down failed", cpu);
25232 + pfm_debugfs_del_cpu(cpu);
25233 + PFM_INFO("CPU%d is offline", cpu);
25236 + pfm_pmu_conf_put();
25241 + * called from cpu_disable() to detach the perfmon context
25242 + * from the CPU going down.
25244 + * We cannot use the cpu hotplug notifier because we MUST run
25245 + * on the CPU that is going down to save the PMU state
25247 +void pfm_cpu_disable(void)
25249 + struct pfm_context *ctx;
25250 + unsigned long flags;
25251 + int is_system, release_info = 0;
25255 + ctx = __get_cpu_var(pmu_ctx);
25259 + is_system = ctx->flags.system;
25263 + * context is LOADED or MASKED
25265 + * we unload from CPU. That stops monitoring and does
25266 + * all the bookeeping of saving values and updating duration
25268 + spin_lock_irqsave(&ctx->lock, flags);
25270 + __pfm_unload_context(ctx, &release_info);
25271 + spin_unlock_irqrestore(&ctx->lock, flags);
25276 + if (release_info & 0x2) {
25277 + r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
25278 + PFM_DBG("timeout cancel=%d", r);
25281 + if (release_info & 0x1)
25282 + pfm_session_release(is_system, cpu);
25285 +static struct notifier_block pfm_cpu_notifier = {
25286 + .notifier_call = pfm_cpu_notify
25289 +int __init pfm_init_hotplug(void)
25293 + * register CPU hotplug event notifier
25295 + ret = register_cpu_notifier(&pfm_cpu_notifier);
25297 + PFM_LOG("CPU hotplug support enabled");
25300 +#endif /* CONFIG_HOTPLUG_CPU */
25301 diff --git a/perfmon/perfmon_init.c b/perfmon/perfmon_init.c
25302 new file mode 100644
25303 index 0000000..bbb6e4d
25305 +++ b/perfmon/perfmon_init.c
25308 + * perfmon.c: perfmon2 global initialization functions
25310 + * This file implements the perfmon2 interface which
25311 + * provides access to the hardware performance counters
25312 + * of the host processor.
25315 + * The initial version of perfmon.c was written by
25316 + * Ganesh Venkitachalam, IBM Corp.
25318 + * Then it was modified for perfmon-1.x by Stephane Eranian and
25319 + * David Mosberger, Hewlett Packard Co.
25321 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
25322 + * by Stephane Eranian, Hewlett Packard Co.
25324 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
25325 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
25326 + * David Mosberger-Tang <davidm@hpl.hp.com>
25328 + * More information about perfmon available at:
25329 + * http://perfmon2.sf.net
25331 + * This program is free software; you can redistribute it and/or
25332 + * modify it under the terms of version 2 of the GNU General Public
25333 + * License as published by the Free Software Foundation.
25335 + * This program is distributed in the hope that it will be useful,
25336 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25337 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25338 + * General Public License for more details.
25340 + * You should have received a copy of the GNU General Public License
25341 + * along with this program; if not, write to the Free Software
25342 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25345 +#include <linux/kernel.h>
25346 +#include <linux/perfmon_kern.h>
25347 +#include "perfmon_priv.h"
25350 + * external variables
25352 +DEFINE_PER_CPU(struct task_struct *, pmu_owner);
25353 +DEFINE_PER_CPU(struct pfm_context *, pmu_ctx);
25354 +DEFINE_PER_CPU(u64, pmu_activation_number);
25355 +DEFINE_PER_CPU(struct pfm_stats, pfm_stats);
25356 +DEFINE_PER_CPU(struct hrtimer, pfm_hrtimer);
25359 +int perfmon_disabled; /* >0 if perfmon is disabled */
25362 + * called from cpu_init() and pfm_pmu_register()
25364 +void __pfm_init_percpu(void *dummy)
25366 + struct hrtimer *h;
25368 + h = &__get_cpu_var(pfm_hrtimer);
25370 + pfm_arch_init_percpu();
25373 + * initialize per-cpu high res timer
25375 + hrtimer_init(h, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
25376 +#ifdef CONFIG_HIGH_RES_TIMERS
25378 + * avoid potential deadlock on the runqueue lock
25379 + * during context switch when multiplexing. Situation
25380 + * arises on architectures which run switch_to() with
25381 + * the runqueue lock held, e.g., x86. On others, e.g.,
25382 + * IA-64, the problem does not exist.
25383 + * Setting the callback mode to HRTIMER_CB_IRQSAFE_UNOCKED
25384 + * such that the callback routine is only called on hardirq
25385 + * context not on softirq, thus the context switch will not
25386 + * end up trying to wakeup the softirqd
25388 + h->cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
25390 + h->function = pfm_handle_switch_timeout;
25394 + * global initialization routine, executed only once
25396 +int __init pfm_init(void)
25398 + PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN);
25400 + if (pfm_init_ctx())
25401 + goto error_disable;
25404 + if (pfm_init_sets())
25405 + goto error_disable;
25407 + if (pfm_init_fs())
25408 + goto error_disable;
25410 + if (pfm_init_sysfs())
25411 + goto error_disable;
25413 + /* not critical, so no error checking */
25414 + pfm_init_debugfs();
25417 + * one time, arch-specific global initialization
25419 + if (pfm_arch_init())
25420 + goto error_disable;
25422 + if (pfm_init_hotplug())
25423 + goto error_disable;
25427 + PFM_ERR("perfmon is disabled due to initialization error");
25428 + perfmon_disabled = 1;
25433 + * must use subsys_initcall() to ensure that the perfmon2 core
25434 + * is initialized before any PMU description module when they are
25437 +subsys_initcall(pfm_init);
25438 diff --git a/perfmon/perfmon_intr.c b/perfmon/perfmon_intr.c
25439 new file mode 100644
25440 index 0000000..c5e3cda
25442 +++ b/perfmon/perfmon_intr.c
25445 + * perfmon_intr.c: perfmon2 interrupt handling
25447 + * This file implements the perfmon2 interface which
25448 + * provides access to the hardware performance counters
25449 + * of the host processor.
25451 + * The initial version of perfmon.c was written by
25452 + * Ganesh Venkitachalam, IBM Corp.
25454 + * Then it was modified for perfmon-1.x by Stephane Eranian and
25455 + * David Mosberger, Hewlett Packard Co.
25457 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
25458 + * by Stephane Eranian, Hewlett Packard Co.
25460 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
25461 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
25462 + * David Mosberger-Tang <davidm@hpl.hp.com>
25464 + * More information about perfmon available at:
25465 + * http://perfmon2.sf.net
25467 + * This program is free software; you can redistribute it and/or
25468 + * modify it under the terms of version 2 of the GNU General Public
25469 + * License as published by the Free Software Foundation.
25471 + * This program is distributed in the hope that it will be useful,
25472 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25473 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25474 + * General Public License for more details.
25476 + * You should have received a copy of the GNU General Public License
25477 + * along with this program; if not, write to the Free Software
25478 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25481 +#include <linux/kernel.h>
25482 +#include <linux/module.h>
25483 +#include <linux/perfmon_kern.h>
25484 +#include "perfmon_priv.h"
25487 + * pfm_intr_process_64bit_ovfls - handle 64-bit counter emulation
25488 + * @ctx: context to operate on
25489 + * @set: set to operate on
25491 + * The function returns the number of 64-bit overflows detected.
25493 + * 64-bit software pmds are updated for overflowed pmd registers
25494 + * the set->reset_pmds is updated to the list of pmds to reset
25496 + * In any case, set->npend_ovfls is cleared
25498 +static u16 pfm_intr_process_64bit_ovfls(struct pfm_context *ctx,
25499 + struct pfm_event_set *set,
25502 + u16 i, num_ovfls, max_pmd, max_intr;
25503 + u16 num_64b_ovfls, has_ovfl_sw, must_switch;
25504 + u64 ovfl_thres, old_val, new_val, ovfl_mask;
25506 + num_64b_ovfls = must_switch = 0;
25508 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
25509 + max_pmd = ctx->regs.max_pmd;
25510 + max_intr = ctx->regs.max_intr_pmd;
25512 + num_ovfls = set->npend_ovfls;
25513 + has_ovfl_sw = set->flags & PFM_SETFL_OVFL_SWITCH;
25515 + bitmap_zero(cast_ulp(set->reset_pmds), max_pmd);
25517 + for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) {
25519 + * skip pmd which did not overflow
25521 + if (!test_bit(i, cast_ulp(set->povfl_pmds)))
25527 + * Update software value for counters ONLY
25529 + * Note that the pmd is not necessarily 0 at this point as
25530 + * qualified events may have happened before the PMU was
25531 + * frozen. The residual count is not taken into consideration
25532 + * here but will be with any read of the pmd
25534 + ovfl_thres = set->pmds[i].ovflsw_thres;
25536 + if (likely(test_bit(i, cast_ulp(ctx->regs.cnt_pmds)))) {
25537 + old_val = new_val = set->pmds[i].value;
25538 + new_val += 1 + ovfl_mask;
25539 + set->pmds[i].value = new_val;
25542 + * for non counters which interrupt, e.g., AMD IBS,
25543 + * we consider this equivalent to a 64-bit counter
25546 + old_val = 1; new_val = 0;
25550 + * check for 64-bit overflow condition
25552 + if (likely(old_val > new_val)) {
25554 + if (has_ovfl_sw && ovfl_thres > 0) {
25555 + if (ovfl_thres == 1)
25557 + set->pmds[i].ovflsw_thres = ovfl_thres - 1;
25561 + * what to reset because of this overflow
25562 + * - the overflowed register
25563 + * - its reset_smpls
25565 + __set_bit(i, cast_ulp(set->reset_pmds));
25567 + bitmap_or(cast_ulp(set->reset_pmds),
25568 + cast_ulp(set->reset_pmds),
25569 + cast_ulp(set->pmds[i].reset_pmds),
25573 + * only keep track of 64-bit overflows or
25576 + __clear_bit(i, cast_ulp(set->povfl_pmds));
25579 + * on some PMU, it may be necessary to re-arm the PMD
25581 + pfm_arch_ovfl_reset_pmd(ctx, i);
25584 + PFM_DBG_ovfl("ovfl=%s pmd%u new=0x%llx old=0x%llx "
25585 + "hw_pmd=0x%llx o_pmds=0x%llx must_switch=%u "
25586 + "o_thres=%llu o_thres_ref=%llu",
25587 + old_val > new_val ? "64-bit" : "HW",
25589 + (unsigned long long)new_val,
25590 + (unsigned long long)old_val,
25591 + (unsigned long long)pfm_read_pmd(ctx, i),
25592 + (unsigned long long)set->povfl_pmds[0],
25594 + (unsigned long long)set->pmds[i].ovflsw_thres,
25595 + (unsigned long long)set->pmds[i].ovflsw_ref_thres);
25598 + * update public bitmask of 64-bit overflowed pmds
25600 + if (num_64b_ovfls)
25601 + bitmap_copy(cast_ulp(set->ovfl_pmds), cast_ulp(set->povfl_pmds),
25605 + *ovfl_ctrl |= PFM_OVFL_CTRL_SWITCH;
25608 + * mark the overflows as consumed
25610 + set->npend_ovfls = 0;
25611 + bitmap_zero(cast_ulp(set->povfl_pmds), max_intr);
25613 + return num_64b_ovfls;
25617 + * pfm_intr_get_smpl_pmds_values - copy 64-bit pmd values for sampling format
25618 + * @ctx: context to work on
25619 + * @set: current event set
25620 + * @arg: overflow arg to be passed to format
25621 + * @smpl_pmds: list of PMDs of interest for the overflowed register
25623 + * build an array of 46-bit PMD values based on smpl_pmds. Values are
25624 + * stored in increasing order of the PMD indexes
25626 +static void pfm_intr_get_smpl_pmds_values(struct pfm_context *ctx,
25627 + struct pfm_event_set *set,
25628 + struct pfm_ovfl_arg *arg,
25631 + u16 j, k, max_pmd;
25632 + u64 new_val, ovfl_mask;
25635 + cnt_pmds = ctx->regs.cnt_pmds;
25636 + max_pmd = ctx->regs.max_pmd;
25637 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
25639 + for (j = k = 0; j < max_pmd; j++) {
25641 + if (!test_bit(j, cast_ulp(smpl_pmds)))
25644 + new_val = pfm_read_pmd(ctx, j);
25646 + /* for counters, build 64-bit value */
25647 + if (test_bit(j, cast_ulp(cnt_pmds)))
25648 + new_val = (set->pmds[j].value & ~ovfl_mask)
25649 + | (new_val & ovfl_mask);
25651 + arg->smpl_pmds_values[k++] = new_val;
25653 + PFM_DBG_ovfl("s_pmd_val[%u]=pmd%u=0x%llx", k, j,
25654 + (unsigned long long)new_val);
25656 + arg->num_smpl_pmds = k;
25660 + * pfm_intr_process_smpl_fmt -- handle sampling format callback
25661 + * @ctx: context to work on
25662 + * @set: current event set
25663 + * @ip: interrupted instruction pointer
25664 + * @now: timestamp
25665 + * @num_ovfls: number of 64-bit overflows
25666 + * @ovfl_ctrl: set of controls for interrupt handler tail processing
25667 + * @regs: register state
25669 + * Prepare argument (ovfl_arg) to be passed to sampling format callback, then
25670 + * invoke the callback (fmt_handler)
25672 +static int pfm_intr_process_smpl_fmt(struct pfm_context *ctx,
25673 + struct pfm_event_set *set,
25674 + unsigned long ip,
25678 + struct pt_regs *regs)
25680 + struct pfm_ovfl_arg *ovfl_arg;
25681 + u64 start_cycles, end_cycles;
25685 + ovfl_arg = &ctx->ovfl_arg;
25687 + ovfl_arg->active_set = set->id;
25688 + max_pmd = ctx->regs.max_pmd;
25691 + * first_intr_pmd: first PMD which can generate PMU interrupts
25693 + for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) {
25695 + * skip pmd which did not have 64-bit overflows
25697 + if (!test_bit(i, cast_ulp(set->ovfl_pmds)))
25703 + * prepare argument to fmt_handler
25705 + ovfl_arg->ovfl_pmd = i;
25706 + ovfl_arg->ovfl_ctrl = 0;
25708 + ovfl_arg->pmd_last_reset = set->pmds[i].lval;
25709 + ovfl_arg->pmd_eventid = set->pmds[i].eventid;
25710 + ovfl_arg->num_smpl_pmds = 0;
25713 + * copy values of pmds of interest, if any
25714 + * Sampling format may use them
25715 + * We do not initialize the unused smpl_pmds_values
25717 + if (!bitmap_empty(cast_ulp(set->pmds[i].smpl_pmds), max_pmd))
25718 + pfm_intr_get_smpl_pmds_values(ctx, set, ovfl_arg,
25719 + set->pmds[i].smpl_pmds);
25721 + pfm_stats_inc(fmt_handler_calls);
25724 + * call format record (handler) routine
25726 + start_cycles = sched_clock();
25727 + ret = (*ctx->smpl_fmt->fmt_handler)(ctx, ip, now, regs);
25728 + end_cycles = sched_clock();
25731 + * The reset_pmds mask is constructed automatically
25732 + * on overflow. When the actual reset takes place
25733 + * depends on the masking, switch and notification
25734 + * status. It may be deferred until pfm_restart().
25736 + *ovfl_ctrl |= ovfl_arg->ovfl_ctrl;
25738 + pfm_stats_add(fmt_handler_ns, end_cycles - start_cycles);
25741 + * when the format cannot handle the rest of the overflow, we abort
25744 + PFM_DBG_ovfl("handler aborted at PMD%u ret=%d", i, ret);
25748 + * pfm_overflow_handler - main overflow processing routine.
25749 + * @ctx: context to work on (always current context)
25750 + * @set: current event set
25751 + * @ip: interrupt instruction pointer
25752 + * @regs: machine state
25754 + * set->num_ovfl_pmds is 0 when returning from this function even though
25755 + * set->ovfl_pmds[] may have bits set. When leaving set->num_ovfl_pmds
25756 + * must never be used to determine if there was a pending overflow.
25758 +static void pfm_overflow_handler(struct pfm_context *ctx,
25759 + struct pfm_event_set *set,
25760 + unsigned long ip,
25761 + struct pt_regs *regs)
25763 + struct pfm_event_set *set_orig;
25766 + u16 max_intr, max_pmd;
25768 + int ret, has_notify;
25773 + now = sched_clock();
25775 + max_pmd = ctx->regs.max_pmd;
25776 + max_intr = ctx->regs.max_intr_pmd;
25782 + * skip ZOMBIE case
25784 + if (unlikely(ctx->state == PFM_CTX_ZOMBIE))
25785 + goto stop_monitoring;
25787 + PFM_DBG_ovfl("intr_pmds=0x%llx npend=%u ip=%p, blocking=%d "
25788 + "u_pmds=0x%llx use_fmt=%u",
25789 + (unsigned long long)set->povfl_pmds[0],
25790 + set->npend_ovfls,
25792 + ctx->flags.block,
25793 + (unsigned long long)set->used_pmds[0],
25794 + !!ctx->smpl_fmt);
25797 + * return number of 64-bit overflows
25799 + num_ovfls = pfm_intr_process_64bit_ovfls(ctx, set, &ovfl_ctrl);
25802 + * there were no 64-bit overflows
25803 + * nothing else to do
25809 + * tmp_ovfl_notify = ovfl_pmds & ovfl_notify
25811 + * - ovfl_pmds: last 64-bit overflowed pmds
25812 + * - ovfl_notify: notify on overflow registers
25814 + bitmap_and(cast_ulp(ctx->tmp_ovfl_notify),
25815 + cast_ulp(set->ovfl_pmds),
25816 + cast_ulp(set->ovfl_notify),
25819 + has_notify = !bitmap_empty(cast_ulp(ctx->tmp_ovfl_notify), max_intr);
25822 + * check for sampling format and invoke fmt_handler
25824 + if (likely(ctx->smpl_fmt)) {
25825 + pfm_intr_process_smpl_fmt(ctx, set, ip, now, num_ovfls,
25826 + &ovfl_ctrl, regs);
25829 + * When no sampling format is used, the default
25831 + * - mask monitoring if not switching
25832 + * - notify user if requested
25834 + * If notification is not requested, monitoring is masked
25835 + * and overflowed registers are not reset (saturation).
25836 + * This mimics the behavior of the default sampling format.
25838 + ovfl_ctrl |= PFM_OVFL_CTRL_NOTIFY;
25839 + if (has_notify || !(ovfl_ctrl & PFM_OVFL_CTRL_SWITCH))
25840 + ovfl_ctrl |= PFM_OVFL_CTRL_MASK;
25843 + PFM_DBG_ovfl("set%u o_notify=0x%llx o_pmds=0x%llx "
25844 + "r_pmds=0x%llx ovfl_ctrl=0x%x",
25846 + (unsigned long long)ctx->tmp_ovfl_notify[0],
25847 + (unsigned long long)set->ovfl_pmds[0],
25848 + (unsigned long long)set->reset_pmds[0],
25852 + * execute the various controls
25858 + * mask monitoring
25860 + if (ovfl_ctrl & PFM_OVFL_CTRL_MASK) {
25861 + pfm_mask_monitoring(ctx, set);
25863 + * when masking, reset is deferred until
25866 + ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET;
25869 + * when masking, switching is deferred until
25870 + * pfm_restart and we need to remember it
25872 + if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) {
25873 + set->priv_flags |= PFM_SETFL_PRIV_SWITCH;
25874 + ovfl_ctrl &= ~PFM_OVFL_CTRL_SWITCH;
25879 + * switch event set
25881 + if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) {
25882 + pfm_switch_sets_from_intr(ctx);
25883 + /* update view of active set */
25884 + set = ctx->active_set;
25887 + * send overflow notification
25889 + * only necessary if at least one overflowed
25890 + * register had the notify flag set
25892 + if (has_notify && (ovfl_ctrl & PFM_OVFL_CTRL_NOTIFY)) {
25894 + * block on notify, not on masking
25896 + if (ctx->flags.block)
25897 + pfm_post_work(current, ctx, PFM_WORK_BLOCK);
25900 + * send notification and passed original set id
25901 + * if error, queue full, for instance, then default
25902 + * to masking monitoring, i.e., saturate
25904 + ret = pfm_ovfl_notify(ctx, set_orig, ip);
25905 + if (unlikely(ret)) {
25906 + if (ctx->state == PFM_CTX_LOADED) {
25907 + pfm_mask_monitoring(ctx, set);
25908 + ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET;
25911 + ctx->flags.can_restart++;
25912 + PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart);
25917 + * reset overflowed registers
25919 + if (ovfl_ctrl & PFM_OVFL_CTRL_RESET) {
25921 + nn = bitmap_weight(cast_ulp(set->reset_pmds), max_pmd);
25923 + pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_SHORT);
25929 + * Does not happen for a system-wide context nor for a
25930 + * self-monitored context. We cannot attach to kernel-only
25931 + * thread, thus it is safe to set TIF bits, i.e., the thread
25932 + * will eventually leave the kernel or die and either we will
25933 + * catch the context and clean it up in pfm_handler_work() or
25934 + * pfm_exit_thread().
25936 + * Mask until we get to pfm_handle_work()
25938 + pfm_mask_monitoring(ctx, set);
25940 + PFM_DBG_ovfl("ctx is zombie, converted to spurious");
25941 + pfm_post_work(current, ctx, PFM_WORK_ZOMBIE);
25945 + * __pfm_interrupt_handler - 1st level interrupt handler
25946 + * @ip: interrupted instruction pointer
25947 + * @regs: machine state
25949 + * Function is static because we use a wrapper to easily capture timing infos.
25952 + * Context locking necessary to avoid concurrent accesses from other CPUs
25953 + * - For per-thread, we must prevent pfm_restart() which works when
25954 + * context is LOADED or MASKED
25956 +static void __pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs)
25958 + struct task_struct *task;
25959 + struct pfm_context *ctx;
25960 + struct pfm_event_set *set;
25963 + task = __get_cpu_var(pmu_owner);
25964 + ctx = __get_cpu_var(pmu_ctx);
25967 + * verify if there is a context on this CPU
25969 + if (unlikely(ctx == NULL)) {
25970 + PFM_DBG_ovfl("no ctx");
25975 + * we need to lock context because it could be accessed
25976 + * from another CPU. Depending on the priority level of
25977 + * the PMU interrupt or the arch, it may be necessary to
25978 + * mask interrupts alltogether to avoid race condition with
25979 + * the timer interrupt in case of time-based set switching,
25982 + spin_lock(&ctx->lock);
25984 + set = ctx->active_set;
25987 + * For SMP per-thread, it is not possible to have
25988 + * owner != NULL && task != current.
25990 + * For UP per-thread, because of lazy save, it
25991 + * is possible to receive an interrupt in another task
25992 + * which is not using the PMU. This means
25993 + * that the interrupt was in-flight at the
25994 + * time of pfm_ctxswout_thread(). In that
25995 + * case, it will be replayed when the task
25996 + * is scheduled again. Hence we convert to spurious.
25998 + * The basic rule is that an overflow is always
25999 + * processed in the context of the task that
26000 + * generated it for all per-thread contexts.
26002 + * for system-wide, task is always NULL
26004 +#ifndef CONFIG_SMP
26005 + if (unlikely((task && current->pfm_context != ctx))) {
26006 + PFM_DBG_ovfl("spurious: not owned by current task");
26010 + if (unlikely(ctx->state == PFM_CTX_MASKED)) {
26011 + PFM_DBG_ovfl("spurious: monitoring masked");
26016 + * check that monitoring is active, otherwise convert
26019 + if (unlikely(!pfm_arch_is_active(ctx))) {
26020 + PFM_DBG_ovfl("spurious: monitoring non active");
26025 + * freeze PMU and collect overflowed PMD registers
26026 + * into set->povfl_pmds. Number of overflowed PMDs
26027 + * reported in set->npend_ovfls
26029 + pfm_arch_intr_freeze_pmu(ctx, set);
26032 + * no overflow detected, interrupt may have come
26033 + * from the previous thread running on this CPU
26035 + if (unlikely(!set->npend_ovfls)) {
26036 + PFM_DBG_ovfl("no npend_ovfls");
26040 + pfm_stats_inc(ovfl_intr_regular_count);
26043 + * invoke actual handler
26045 + pfm_overflow_handler(ctx, set, ip, regs);
26048 + * unfreeze PMU, monitoring may not actual be restarted
26049 + * if context is MASKED
26051 + pfm_arch_intr_unfreeze_pmu(ctx);
26053 + spin_unlock(&ctx->lock);
26058 + /* ctx may be NULL */
26059 + pfm_arch_intr_unfreeze_pmu(ctx);
26061 + spin_unlock(&ctx->lock);
26063 + pfm_stats_inc(ovfl_intr_spurious_count);
26068 + * pfm_interrupt_handler - 1st level interrupt handler
26069 + * @ip: interrupt instruction pointer
26070 + * @regs: machine state
26072 + * Function called from the low-level assembly code or arch-specific perfmon
26073 + * code. Simple wrapper used for timing purpose. Actual work done in
26074 + * __pfm_overflow_handler()
26076 +void pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs)
26080 + pfm_stats_inc(ovfl_intr_all_count);
26082 + BUG_ON(!irqs_disabled());
26084 + start = sched_clock();
26086 + __pfm_interrupt_handler(ip, regs);
26088 + pfm_stats_add(ovfl_intr_ns, sched_clock() - start);
26090 +EXPORT_SYMBOL(pfm_interrupt_handler);
26092 diff --git a/perfmon/perfmon_msg.c b/perfmon/perfmon_msg.c
26093 new file mode 100644
26094 index 0000000..b8a1e4c
26096 +++ b/perfmon/perfmon_msg.c
26099 + * perfmon_msg.c: perfmon2 notification message queue management
26101 + * This file implements the perfmon2 interface which
26102 + * provides access to the hardware performance counters
26103 + * of the host processor.
26105 + * The initial version of perfmon.c was written by
26106 + * Ganesh Venkitachalam, IBM Corp.
26108 + * Then it was modified for perfmon-1.x by Stephane Eranian and
26109 + * David Mosberger, Hewlett Packard Co.
26111 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
26112 + * by Stephane Eranian, Hewlett Packard Co.
26114 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
26115 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
26116 + * David Mosberger-Tang <davidm@hpl.hp.com>
26118 + * More information about perfmon available at:
26119 + * http://perfmon2.sf.net
26121 + * This program is free software; you can redistribute it and/or
26122 + * modify it under the terms of version 2 of the GNU General Public
26123 + * License as published by the Free Software Foundation.
26125 + * This program is distributed in the hope that it will be useful,
26126 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26127 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26128 + * General Public License for more details.
26130 + * You should have received a copy of the GNU General Public License
26131 + * along with this program; if not, write to the Free Software
26132 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26135 +#include <linux/kernel.h>
26136 +#include <linux/poll.h>
26137 +#include <linux/perfmon_kern.h>
26140 + * pfm_get_new_msg - get a new message slot from the queue
26141 + * @ctx: context to operate on
26143 + * if queue if full NULL is returned
26145 +static union pfarg_msg *pfm_get_new_msg(struct pfm_context *ctx)
26149 + next = ctx->msgq_head & PFM_MSGQ_MASK;
26151 + if ((ctx->msgq_head - ctx->msgq_tail) == PFM_MSGS_COUNT)
26155 + * move to next possible slot
26157 + ctx->msgq_head++;
26159 + PFM_DBG_ovfl("head=%d tail=%d msg=%d",
26160 + ctx->msgq_head & PFM_MSGQ_MASK,
26161 + ctx->msgq_tail & PFM_MSGQ_MASK,
26164 + return ctx->msgq+next;
26168 + * pfm_notify_user - wakeup any thread wiating on msg queue, post SIGIO
26169 + * @ctx: context to operate on
26171 + * message is already enqueued
26173 +static void pfm_notify_user(struct pfm_context *ctx)
26175 + if (ctx->state == PFM_CTX_ZOMBIE) {
26176 + PFM_DBG("no notification, context is zombie");
26180 + PFM_DBG_ovfl("waking up");
26182 + wake_up_interruptible(&ctx->msgq_wait);
26185 + * it is safe to call kill_fasync() from an interrupt
26186 + * handler. kill_fasync() grabs two RW locks (fasync_lock,
26187 + * tasklist_lock) in read mode. There is conflict only in
26188 + * case the PMU interrupt occurs during a write mode critical
26189 + * section. This cannot happen because for both locks, the
26190 + * write mode is always using interrupt masking (write_lock_irq).
26192 + kill_fasync(&ctx->async_queue, SIGIO, POLL_IN);
26196 + * pfm_ovfl_notify - send overflow notification
26197 + * @ctx: context to operate on
26198 + * @set: which set the overflow comes from
26199 + * @ip: overflow interrupt instruction address (IIP)
26201 + * Appends an overflow notification message to context queue.
26202 + * call pfm_notify() to wakeup any threads and/or send a signal
26204 + * Context is locked and interrupts are disabled (no preemption).
26206 +int pfm_ovfl_notify(struct pfm_context *ctx,
26207 + struct pfm_event_set *set,
26208 + unsigned long ip)
26210 + union pfarg_msg *msg = NULL;
26213 + if (!ctx->flags.no_msg) {
26214 + msg = pfm_get_new_msg(ctx);
26215 + if (msg == NULL) {
26217 + * when message queue fills up it is because the user
26218 + * did not extract the message, yet issued
26219 + * pfm_restart(). At this point, we stop sending
26220 + * notification, thus the user will not be able to get
26221 + * new samples when using the default format.
26223 + PFM_DBG_ovfl("no more notification msgs");
26227 + msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL;
26228 + msg->pfm_ovfl_msg.msg_ovfl_pid = current->pid;
26229 + msg->pfm_ovfl_msg.msg_active_set = set->id;
26231 + ovfl_pmds = msg->pfm_ovfl_msg.msg_ovfl_pmds;
26234 + * copy bitmask of all pmd that interrupted last
26236 + bitmap_copy(cast_ulp(ovfl_pmds), cast_ulp(set->ovfl_pmds),
26237 + ctx->regs.max_intr_pmd);
26239 + msg->pfm_ovfl_msg.msg_ovfl_cpu = smp_processor_id();
26240 + msg->pfm_ovfl_msg.msg_ovfl_tid = current->tgid;
26241 + msg->pfm_ovfl_msg.msg_ovfl_ip = ip;
26243 + pfm_stats_inc(ovfl_notify_count);
26246 + PFM_DBG_ovfl("ip=0x%lx o_pmds=0x%llx",
26248 + (unsigned long long)set->ovfl_pmds[0]);
26250 + pfm_notify_user(ctx);
26255 + * pfm_end_notify_user - notify of thread termination
26256 + * @ctx: context to operate on
26258 + * In per-thread mode, when not self-monitoring, perfmon
26259 + * sends a 'end' notification message when the monitored
26260 + * thread where the context is attached is exiting.
26262 + * This helper message alleviates the need to track the activity
26263 + * of the thread/process when it is not directly related, i.e.,
26264 + * was attached. In other words, no needto keep the thread
26267 + * The context must be locked and interrupts disabled.
26269 +int pfm_end_notify(struct pfm_context *ctx)
26271 + union pfarg_msg *msg;
26273 + msg = pfm_get_new_msg(ctx);
26274 + if (msg == NULL) {
26275 + PFM_ERR("%s no more msgs", __func__);
26279 + memset(msg, 0, sizeof(*msg));
26281 + msg->type = PFM_MSG_END;
26283 + PFM_DBG("end msg: msg=%p no_msg=%d",
26285 + ctx->flags.no_msg);
26287 + pfm_notify_user(ctx);
26292 + * pfm_get_next_msg - copy the oldest message from the queue and move tail
26293 + * @ctx: context to use
26294 + * @m: where to copy the message into
26296 + * The tail of the queue is moved as a consequence of this call
26298 +void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m)
26300 + union pfarg_msg *next;
26302 + PFM_DBG_ovfl("in head=%d tail=%d",
26303 + ctx->msgq_head & PFM_MSGQ_MASK,
26304 + ctx->msgq_tail & PFM_MSGQ_MASK);
26307 + * get oldest message
26309 + next = ctx->msgq + (ctx->msgq_tail & PFM_MSGQ_MASK);
26312 + * move tail forward
26314 + ctx->msgq_tail++;
26317 + * copy message, we cannot simply point to it
26318 + * as it may be re-used before we copy it out
26322 + PFM_DBG_ovfl("out head=%d tail=%d type=%d",
26323 + ctx->msgq_head & PFM_MSGQ_MASK,
26324 + ctx->msgq_tail & PFM_MSGQ_MASK,
26327 diff --git a/perfmon/perfmon_pmu.c b/perfmon/perfmon_pmu.c
26328 new file mode 100644
26329 index 0000000..df7a9c9
26331 +++ b/perfmon/perfmon_pmu.c
26334 + * perfmon_pmu.c: perfmon2 PMU configuration management
26336 + * This file implements the perfmon2 interface which
26337 + * provides access to the hardware performance counters
26338 + * of the host processor.
26340 + * The initial version of perfmon.c was written by
26341 + * Ganesh Venkitachalam, IBM Corp.
26343 + * Then it was modified for perfmon-1.x by Stephane Eranian and
26344 + * David Mosberger, Hewlett Packard Co.
26346 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
26347 + * by Stephane Eranian, Hewlett Packard Co.
26349 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
26350 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
26351 + * David Mosberger-Tang <davidm@hpl.hp.com>
26353 + * More information about perfmon available at:
26354 + * http://perfmon2.sf.net
26356 + * This program is free software; you can redistribute it and/or
26357 + * modify it under the terms of version 2 of the GNU General Public
26358 + * License as published by the Free Software Foundation.
26360 + * This program is distributed in the hope that it will be useful,
26361 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26362 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26363 + * General Public License for more details.
26365 + * You should have received a copy of the GNU General Public License
26366 + * along with this program; if not, write to the Free Software
26367 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26370 +#include <linux/module.h>
26371 +#include <linux/perfmon_kern.h>
26372 +#include "perfmon_priv.h"
26374 +#ifndef CONFIG_MODULE_UNLOAD
26375 +#define module_refcount(n) 1
26378 +static __cacheline_aligned_in_smp int request_mod_in_progress;
26379 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_conf_lock);
26381 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_acq_lock);
26382 +static u32 pfm_pmu_acquired;
26385 + * perfmon core must acces PMU information ONLY through pfm_pmu_conf
26386 + * if pfm_pmu_conf is NULL, then no description is registered
26388 +struct pfm_pmu_config *pfm_pmu_conf;
26389 +EXPORT_SYMBOL(pfm_pmu_conf);
26391 +static inline int pmu_is_module(struct pfm_pmu_config *c)
26393 + return !(c->flags & PFM_PMUFL_IS_BUILTIN);
26396 + * pfm_pmu_regdesc_init -- initialize regdesc structure from PMU table
26397 + * @regs: the regdesc structure to initialize
26398 + * @excl_type: the register type(s) to exclude from this regdesc
26399 + * @unvail_pmcs: unavailable PMC registers
26400 + * @unavail_pmds: unavailable PMD registers
26404 + * errno in case of error
26406 +static int pfm_pmu_regdesc_init(struct pfm_regdesc *regs, int excl_type,
26407 + u64 *unavail_pmcs, u64 *unavail_pmds)
26409 + struct pfm_regmap_desc *d;
26410 + u16 n, n2, n_counters, i;
26411 + int first_intr_pmd = -1, max1, max2, max3;
26414 + * compute the number of implemented PMC from the
26415 + * description table
26418 + max1 = max2 = -1;
26419 + d = pfm_pmu_conf->pmc_desc;
26420 + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
26421 + if (!(d->type & PFM_REG_I))
26424 + if (test_bit(i, cast_ulp(unavail_pmcs)))
26427 + if (d->type & excl_type)
26430 + __set_bit(i, cast_ulp(regs->pmcs));
26437 + PFM_INFO("%s PMU description has no PMC registers",
26438 + pfm_pmu_conf->pmu_name);
26442 + regs->max_pmc = max1 + 1;
26443 + regs->num_pmcs = n;
26445 + n = n_counters = n2 = 0;
26446 + max1 = max2 = max3 = -1;
26447 + d = pfm_pmu_conf->pmd_desc;
26448 + for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) {
26449 + if (!(d->type & PFM_REG_I))
26452 + if (test_bit(i, cast_ulp(unavail_pmds)))
26455 + if (d->type & excl_type)
26458 + __set_bit(i, cast_ulp(regs->pmds));
26463 + * read-write registers
26465 + if (!(d->type & PFM_REG_RO)) {
26466 + __set_bit(i, cast_ulp(regs->rw_pmds));
26472 + * counter registers
26474 + if (d->type & PFM_REG_C64) {
26475 + __set_bit(i, cast_ulp(regs->cnt_pmds));
26480 + * PMD with intr capabilities
26482 + if (d->type & PFM_REG_INTR) {
26483 + __set_bit(i, cast_ulp(regs->intr_pmds));
26484 + if (first_intr_pmd == -1)
26485 + first_intr_pmd = i;
26491 + PFM_INFO("%s PMU description has no PMD registers",
26492 + pfm_pmu_conf->pmu_name);
26496 + regs->max_pmd = max1 + 1;
26497 + regs->first_intr_pmd = first_intr_pmd;
26498 + regs->max_intr_pmd = max2 + 1;
26500 + regs->num_counters = n_counters;
26501 + regs->num_pmds = n;
26502 + regs->max_rw_pmd = max3 + 1;
26503 + regs->num_rw_pmd = n2;
26509 + * pfm_pmu_regdesc_init_all -- initialize all regdesc structures
26510 + * @una_pmcs : unavailable PMC registers
26511 + * @una_pmds : unavailable PMD registers
26517 + * We maintain 3 regdesc:
26518 + * regs_all: all available registers
26519 + * regs_sys: registers available to system-wide contexts only
26520 + * regs_thr: registers available to per-thread contexts only
26522 +static int pfm_pmu_regdesc_init_all(u64 *una_pmcs, u64 *una_pmds)
26526 + memset(&pfm_pmu_conf->regs_all, 0, sizeof(struct pfm_regdesc));
26527 + memset(&pfm_pmu_conf->regs_thr, 0, sizeof(struct pfm_regdesc));
26528 + memset(&pfm_pmu_conf->regs_sys, 0, sizeof(struct pfm_regdesc));
26530 + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_all,
26532 + una_pmcs, una_pmds);
26536 + PFM_DBG("regs_all.pmcs=0x%llx",
26537 + (unsigned long long)pfm_pmu_conf->regs_all.pmcs[0]);
26539 + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_thr,
26541 + una_pmcs, una_pmds);
26544 + PFM_DBG("regs.thr.pmcs=0x%llx",
26545 + (unsigned long long)pfm_pmu_conf->regs_thr.pmcs[0]);
26547 + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_sys,
26549 + una_pmcs, una_pmds);
26551 + PFM_DBG("regs_sys.pmcs=0x%llx",
26552 + (unsigned long long)pfm_pmu_conf->regs_sys.pmcs[0]);
26557 +int pfm_pmu_register(struct pfm_pmu_config *cfg)
26559 + u16 i, nspec, nspec_ro, num_pmcs, num_pmds, num_wc = 0;
26560 + int type, ret = -EBUSY;
26562 + if (perfmon_disabled) {
26563 + PFM_INFO("perfmon disabled, cannot add PMU description");
26567 + nspec = nspec_ro = num_pmds = num_pmcs = 0;
26569 + /* some sanity checks */
26570 + if (cfg == NULL || cfg->pmu_name == NULL) {
26571 + PFM_INFO("PMU config descriptor is invalid");
26575 + /* must have a probe */
26576 + if (cfg->probe_pmu == NULL) {
26577 + PFM_INFO("PMU config has no probe routine");
26582 + * execute probe routine before anything else as it
26583 + * may update configuration tables
26585 + if ((*cfg->probe_pmu)() == -1) {
26586 + PFM_INFO("%s PMU detection failed", cfg->pmu_name);
26590 + if (!(cfg->flags & PFM_PMUFL_IS_BUILTIN) && cfg->owner == NULL) {
26591 + PFM_INFO("PMU config %s is missing owner", cfg->pmu_name);
26595 + if (!cfg->num_pmd_entries) {
26596 + PFM_INFO("%s needs to define num_pmd_entries", cfg->pmu_name);
26600 + if (!cfg->num_pmc_entries) {
26601 + PFM_INFO("%s needs to define num_pmc_entries", cfg->pmu_name);
26605 + if (!cfg->counter_width) {
26606 + PFM_INFO("PMU config %s, zero width counters", cfg->pmu_name);
26611 + * REG_RO, REG_V not supported on PMC registers
26613 + for (i = 0; i < cfg->num_pmc_entries; i++) {
26615 + type = cfg->pmc_desc[i].type;
26617 + if (type & PFM_REG_I)
26620 + if (type & PFM_REG_WC)
26623 + if (type & PFM_REG_V) {
26624 + PFM_INFO("PFM_REG_V is not supported on "
26625 + "PMCs (PMC%d)", i);
26628 + if (type & PFM_REG_RO) {
26629 + PFM_INFO("PFM_REG_RO meaningless on "
26630 + "PMCs (PMC%u)", i);
26635 + if (num_wc && cfg->pmc_write_check == NULL) {
26636 + PFM_INFO("some PMCs have write-checker but no callback provided\n");
26641 + * check virtual PMD registers
26644 + for (i = 0; i < cfg->num_pmd_entries; i++) {
26646 + type = cfg->pmd_desc[i].type;
26648 + if (type & PFM_REG_I)
26651 + if (type & PFM_REG_V) {
26653 + if (type & PFM_REG_RO)
26657 + if (type & PFM_REG_WC)
26661 + if (num_wc && cfg->pmd_write_check == NULL) {
26662 + PFM_INFO("PMD have write-checker but no callback provided\n");
26666 + if (nspec && cfg->pmd_sread == NULL) {
26667 + PFM_INFO("PMU config is missing pmd_sread()");
26671 + nspec = nspec - nspec_ro;
26672 + if (nspec && cfg->pmd_swrite == NULL) {
26673 + PFM_INFO("PMU config is missing pmd_swrite()");
26677 + if (num_pmcs >= PFM_MAX_PMCS) {
26678 + PFM_INFO("%s PMCS registers exceed name space [0-%u]",
26683 + if (num_pmds >= PFM_MAX_PMDS) {
26684 + PFM_INFO("%s PMDS registers exceed name space [0-%u]",
26689 + spin_lock(&pfm_pmu_conf_lock);
26691 + if (pfm_pmu_conf)
26694 + if (!cfg->version)
26695 + cfg->version = "0.0";
26697 + pfm_pmu_conf = cfg;
26698 + pfm_pmu_conf->ovfl_mask = (1ULL << cfg->counter_width) - 1;
26700 + ret = pfm_arch_pmu_config_init(cfg);
26704 + ret = pfm_sysfs_add_pmu(pfm_pmu_conf);
26706 + pfm_pmu_conf = NULL;
26709 + spin_unlock(&pfm_pmu_conf_lock);
26712 + PFM_INFO("register %s PMU error %d", cfg->pmu_name, ret);
26714 + PFM_INFO("%s PMU installed", cfg->pmu_name);
26716 + * (re)initialize PMU on each PMU now that we have a description
26718 + on_each_cpu(__pfm_init_percpu, cfg, 0);
26722 +EXPORT_SYMBOL(pfm_pmu_register);
26725 + * remove PMU description. Caller must pass address of current
26726 + * configuration. This is mostly for sanity checking as only
26727 + * one config can exist at any time.
26729 + * We are using the module refcount mechanism to protect against
26730 + * removal while the configuration is being used. As long as there is
26731 + * one context, a PMU configuration cannot be removed. The protection is
26732 + * managed in module logic.
26734 +void pfm_pmu_unregister(struct pfm_pmu_config *cfg)
26736 + if (!(cfg || pfm_pmu_conf))
26739 + spin_lock(&pfm_pmu_conf_lock);
26741 + BUG_ON(module_refcount(pfm_pmu_conf->owner));
26743 + if (cfg->owner == pfm_pmu_conf->owner) {
26744 + pfm_sysfs_remove_pmu(pfm_pmu_conf);
26745 + pfm_pmu_conf = NULL;
26748 + spin_unlock(&pfm_pmu_conf_lock);
26750 +EXPORT_SYMBOL(pfm_pmu_unregister);
26752 +static int pfm_pmu_request_module(void)
26757 + mod_name = pfm_arch_get_pmu_module_name();
26758 + if (mod_name == NULL)
26761 + ret = request_module(mod_name);
26763 + PFM_DBG("mod=%s ret=%d\n", mod_name, ret);
26769 + * 0 : do not try to autoload the PMU description module
26770 + * not 0 : try to autoload the PMU description module
26772 +int pfm_pmu_conf_get(int autoload)
26776 + spin_lock(&pfm_pmu_conf_lock);
26778 + if (request_mod_in_progress) {
26783 + if (autoload && pfm_pmu_conf == NULL) {
26785 + request_mod_in_progress = 1;
26787 + spin_unlock(&pfm_pmu_conf_lock);
26789 + pfm_pmu_request_module();
26791 + spin_lock(&pfm_pmu_conf_lock);
26793 + request_mod_in_progress = 0;
26796 + * request_module() may succeed but the module
26797 + * may not have registered properly so we need
26802 + ret = pfm_pmu_conf == NULL ? -ENOSYS : 0;
26803 + if (!ret && pmu_is_module(pfm_pmu_conf)
26804 + && !try_module_get(pfm_pmu_conf->owner))
26808 + spin_unlock(&pfm_pmu_conf_lock);
26813 +void pfm_pmu_conf_put(void)
26815 + if (pfm_pmu_conf == NULL || !pmu_is_module(pfm_pmu_conf))
26818 + spin_lock(&pfm_pmu_conf_lock);
26819 + module_put(pfm_pmu_conf->owner);
26820 + spin_unlock(&pfm_pmu_conf_lock);
26825 + * acquire PMU resource from lower-level PMU register allocator
26826 + * (currently perfctr-watchdog.c)
26828 + * acquisition is done when the first context is created (and not
26829 + * when it is loaded). We grab all that is defined in the description
26830 + * module and then we make adjustments at the arch-specific level.
26832 + * The PMU resource is released when the last perfmon context is
26835 + * interrupts are not masked
26837 +int pfm_pmu_acquire(struct pfm_context *ctx)
26839 + u64 unavail_pmcs[PFM_PMC_BV];
26840 + u64 unavail_pmds[PFM_PMD_BV];
26843 + spin_lock(&pfm_pmu_acq_lock);
26845 + PFM_DBG("pmu_acquired=%u", pfm_pmu_acquired);
26847 + pfm_pmu_acquired++;
26850 + * we need to initialize regdesc each time we re-acquire
26851 + * the PMU for the first time as there may have been changes
26852 + * in the list of available registers, e.g., NMI may have
26853 + * been disabled. Checking on PMU module insert is not
26856 + if (pfm_pmu_acquired == 1) {
26857 + memset(unavail_pmcs, 0, sizeof(unavail_pmcs));
26858 + memset(unavail_pmds, 0, sizeof(unavail_pmds));
26860 + ret = pfm_arch_pmu_acquire(unavail_pmcs, unavail_pmds);
26862 + pfm_pmu_acquired--;
26864 + pfm_pmu_regdesc_init_all(unavail_pmcs, unavail_pmds);
26866 + /* available PMU ressources */
26867 + PFM_DBG("PMU acquired: %u PMCs, %u PMDs, %u counters",
26868 + pfm_pmu_conf->regs_all.num_pmcs,
26869 + pfm_pmu_conf->regs_all.num_pmds,
26870 + pfm_pmu_conf->regs_all.num_counters);
26873 + spin_unlock(&pfm_pmu_acq_lock);
26876 + * copy the regdesc that corresponds to the context
26877 + * we copy and not just point because it helps with
26878 + * memory locality. the regdesc structure is accessed
26879 + * very frequently in performance critical code such
26880 + * as context switch and interrupt handling. By using
26881 + * a local copy, we increase memory footprint, but
26882 + * increase chance to have local memory access,
26883 + * especially for system-wide contexts.
26885 + if (ctx->flags.system)
26886 + ctx->regs = pfm_pmu_conf->regs_sys;
26888 + ctx->regs = pfm_pmu_conf->regs_thr;
26894 + * release the PMU resource
26896 + * actual release happens when last context is destroyed
26898 + * interrupts are not masked
26900 +void pfm_pmu_release(void)
26902 + BUG_ON(irqs_disabled());
26905 + * we need to use a spinlock because release takes some time
26906 + * and we may have a race with pfm_pmu_acquire()
26908 + spin_lock(&pfm_pmu_acq_lock);
26910 + PFM_DBG("pmu_acquired=%d", pfm_pmu_acquired);
26913 + * we decouple test and decrement because if we had errors
26914 + * in pfm_pmu_acquire(), we still come here on pfm_context_free()
26915 + * but with pfm_pmu_acquire=0
26917 + if (pfm_pmu_acquired > 0 && --pfm_pmu_acquired == 0) {
26918 + pfm_arch_pmu_release();
26919 + PFM_DBG("PMU released");
26921 + spin_unlock(&pfm_pmu_acq_lock);
26923 diff --git a/perfmon/perfmon_priv.h b/perfmon/perfmon_priv.h
26924 new file mode 100644
26925 index 0000000..5b485de
26927 +++ b/perfmon/perfmon_priv.h
26930 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
26931 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
26933 + * This program is free software; you can redistribute it and/or
26934 + * modify it under the terms of version 2 of the GNU General Public
26935 + * License as published by the Free Software Foundation.
26937 + * This program is distributed in the hope that it will be useful,
26938 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26939 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26940 + * General Public License for more details.
26942 + * You should have received a copy of the GNU General Public License
26943 + * along with this program; if not, write to the Free Software
26944 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26948 +#ifndef __PERFMON_PRIV_H__
26949 +#define __PERFMON_PRIV_H__
26951 + * This file contains all the definitions of data structures, variables, macros
26952 + * that are to private to the generic code, i.e., not shared with any code that
26953 + * lives under arch/ or include/asm-XX
26955 + * For shared definitions, use include/linux/perfmon_kern.h
26958 +#ifdef CONFIG_PERFMON
26961 + * type of PMD reset for pfm_reset_pmds() or pfm_switch_sets*()
26963 +#define PFM_PMD_RESET_SHORT 1 /* use short reset value */
26964 +#define PFM_PMD_RESET_LONG 2 /* use long reset value */
26967 + * context lazy save/restore activation count
26969 +#define PFM_INVALID_ACTIVATION ((u64)~0)
26971 +DECLARE_PER_CPU(u64, pmu_activation_number);
26972 +DECLARE_PER_CPU(struct hrtimer, pfm_hrtimer);
26974 +static inline void pfm_set_pmu_owner(struct task_struct *task,
26975 + struct pfm_context *ctx)
26977 + __get_cpu_var(pmu_owner) = task;
26978 + __get_cpu_var(pmu_ctx) = ctx;
26981 +static inline int pfm_msgq_is_empty(struct pfm_context *ctx)
26983 + return ctx->msgq_head == ctx->msgq_tail;
26986 +void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m);
26987 +int pfm_end_notify(struct pfm_context *ctx);
26988 +int pfm_ovfl_notify(struct pfm_context *ctx, struct pfm_event_set *set,
26989 + unsigned long ip);
26991 +int pfm_alloc_fd(struct file **cfile);
26993 +int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count);
26994 +int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req,
26996 +int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req,
27000 +int pfm_init_ctx(void);
27002 +int pfm_pmu_acquire(struct pfm_context *ctx);
27003 +void pfm_pmu_release(void);
27005 +int pfm_session_acquire(int is_system, u32 cpu);
27006 +void pfm_session_release(int is_system, u32 cpu);
27008 +int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size);
27009 +int pfm_smpl_buf_load_context(struct pfm_context *ctx);
27010 +void pfm_smpl_buf_unload_context(struct pfm_context *ctx);
27012 +int pfm_init_sysfs(void);
27014 +#ifdef CONFIG_PERFMON_DEBUG_FS
27015 +int pfm_init_debugfs(void);
27016 +int pfm_debugfs_add_cpu(int mycpu);
27017 +void pfm_debugfs_del_cpu(int mycpu);
27019 +static inline int pfm_init_debugfs(void)
27023 +static inline int pfm_debugfs_add_cpu(int mycpu)
27028 +static inline void pfm_debugfs_del_cpu(int mycpu)
27033 +void pfm_reset_pmds(struct pfm_context *ctx, struct pfm_event_set *set,
27037 +struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set);
27038 +int pfm_init_sets(void);
27040 +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what);
27042 +void pfm_free_sets(struct pfm_context *ctx);
27043 +int pfm_create_initial_set(struct pfm_context *ctx);
27044 +void pfm_switch_sets_from_intr(struct pfm_context *ctx);
27045 +void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set);
27046 +enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t);
27048 +enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx,
27049 + struct pfm_event_set *new_set,
27054 + * pfm_save_prev_ctx - check if previous context exists and save state
27056 + * called from pfm_load_ctx_thread() and __pfm_ctxsin_thread() to
27057 + * check if previous context exists. If so saved its PMU state. This is used
27058 + * only for UP kernels.
27060 + * PMU ownership is not cleared because the function is always called while
27061 + * trying to install a new owner.
27063 +static inline void pfm_check_save_prev_ctx(void)
27066 + struct pfm_event_set *set;
27067 + struct pfm_context *ctxp;
27069 + ctxp = __get_cpu_var(pmu_ctx);
27073 + * in UP per-thread, due to lazy save
27074 + * there could be a context from another
27075 + * task. We need to push it first before
27076 + * installing our new state
27078 + set = ctxp->active_set;
27079 + pfm_save_pmds(ctxp, set);
27081 + * do not clear ownership because we rewrite
27088 +int pfm_init_fs(void);
27090 +int pfm_init_hotplug(void);
27092 +void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set);
27093 +void pfm_resume_after_ovfl(struct pfm_context *ctx);
27094 +int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg,
27095 + struct file *filp);
27097 +static inline void pfm_post_work(struct task_struct *task,
27098 + struct pfm_context *ctx, int type)
27100 + ctx->flags.work_type = type;
27101 + set_tsk_thread_flag(task, TIF_PERFMON_WORK);
27102 + pfm_arch_arm_handle_work(task);
27105 +#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG
27106 +#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG
27108 +#endif /* CONFIG_PERFMON */
27110 +#endif /* __PERFMON_PRIV_H__ */
27111 diff --git a/perfmon/perfmon_res.c b/perfmon/perfmon_res.c
27112 new file mode 100644
27113 index 0000000..7b0382b
27115 +++ b/perfmon/perfmon_res.c
27118 + * perfmon_res.c: perfmon2 resource allocations
27120 + * This file implements the perfmon2 interface which
27121 + * provides access to the hardware performance counters
27122 + * of the host processor.
27124 + * The initial version of perfmon.c was written by
27125 + * Ganesh Venkitachalam, IBM Corp.
27127 + * Then it was modified for perfmon-1.x by Stephane Eranian and
27128 + * David Mosberger, Hewlett Packard Co.
27130 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
27131 + * by Stephane Eranian, Hewlett Packard Co.
27133 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
27134 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
27135 + * David Mosberger-Tang <davidm@hpl.hp.com>
27137 + * More information about perfmon available at:
27138 + * http://perfmon2.sf.net
27140 + * This program is free software; you can redistribute it and/or
27141 + * modify it under the terms of version 2 of the GNU General Public
27142 + * License as published by the Free Software Foundation.
27144 + * This program is distributed in the hope that it will be useful,
27145 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
27146 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27147 + * General Public License for more details.
27149 + * You should have received a copy of the GNU General Public License
27150 + * along with this program; if not, write to the Free Software
27151 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27154 +#include <linux/kernel.h>
27155 +#include <linux/module.h>
27156 +#include <linux/perfmon_kern.h>
27157 +#include "perfmon_priv.h"
27160 + * global information about all sessions
27161 + * mostly used to synchronize between system wide and per-process
27163 +struct pfm_resources {
27164 + size_t smpl_buf_mem_cur;/* current smpl buf mem usage */
27165 + cpumask_t sys_cpumask; /* bitmask of used cpus */
27166 + u32 thread_sessions; /* #num loaded per-thread sessions */
27169 +static struct pfm_resources pfm_res;
27171 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_res_lock);
27174 + * pfm_smpl_buf_space_acquire - check memory resource usage for sampling buffer
27175 + * @ctx: context of interest
27176 + * @size: size fo requested buffer
27178 + * sampling buffer allocated by perfmon must be
27179 + * checked against max locked memory usage thresholds
27180 + * for security reasons.
27182 + * The first level check is against the system wide limit
27183 + * as indicated by the system administrator in /sys/kernel/perfmon
27185 + * The second level check is on a per-process basis using
27186 + * RLIMIT_MEMLOCK limit.
27188 + * Operating on the current task only.
27190 +int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size)
27192 + struct mm_struct *mm;
27193 + unsigned long locked;
27194 + unsigned long buf_mem, buf_mem_max;
27195 + unsigned long flags;
27197 + spin_lock_irqsave(&pfm_res_lock, flags);
27200 + * check against global buffer limit
27202 + buf_mem_max = pfm_controls.smpl_buffer_mem_max;
27203 + buf_mem = pfm_res.smpl_buf_mem_cur + size;
27205 + if (buf_mem <= buf_mem_max) {
27206 + pfm_res.smpl_buf_mem_cur = buf_mem;
27208 + PFM_DBG("buf_mem_max=%lu current_buf_mem=%lu",
27213 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27215 + if (buf_mem > buf_mem_max) {
27216 + PFM_DBG("smpl buffer memory threshold reached");
27221 + * check against per-process RLIMIT_MEMLOCK
27223 + mm = get_task_mm(current);
27225 + down_write(&mm->mmap_sem);
27227 + locked = mm->locked_vm << PAGE_SHIFT;
27230 + if (locked > current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) {
27232 + PFM_DBG("RLIMIT_MEMLOCK reached ask_locked=%lu rlim_cur=%lu",
27234 + current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur);
27236 + up_write(&mm->mmap_sem);
27241 + mm->locked_vm = locked >> PAGE_SHIFT;
27243 + up_write(&mm->mmap_sem);
27251 + * remove global buffer memory allocation
27253 + spin_lock_irqsave(&pfm_res_lock, flags);
27255 + pfm_res.smpl_buf_mem_cur -= size;
27257 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27262 + * pfm_smpl_buf_space_release - release resource usage for sampling buffer
27263 + * @ctx: perfmon context of interest
27265 + * There exist multiple paths leading to this function. We need to
27266 + * be very careful withlokcing on the mmap_sem as it may already be
27267 + * held by the time we come here.
27268 + * The following paths exist:
27282 + * pfm_context_free
27283 + * pfm_release_buf_space
27292 + * pfm_context_free
27293 + * pfm_release_buf_space
27302 + * pfm_context_free
27303 + * pfm_release_buf_space
27305 + * The issue is that on the munmap() path, the mmap_sem is already held
27306 + * in write-mode by the time we come here. To avoid the deadlock, we need
27307 + * to know where we are coming from and skip down_write(). If is fairly
27308 + * difficult to know this because of the lack of good hooks and
27309 + * the fact that, there may not have been any mmap() of the sampling buffer
27310 + * (i.e. create_context() followed by close() or exit()).
27312 + * We use a set flag ctx->flags.mmap_nlock which is toggled in the vm_ops
27313 + * callback in remove_vma() which is called systematically for the call, so
27314 + * on all but the pure close() path. The exit path does not already hold
27315 + * the lock but this is exit so there is no task->mm by the time we come here.
27317 + * The mmap_nlock is set only when unmapping and this is the LAST reference
27318 + * to the file (i.e., close() followed by munmap()).
27320 +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size)
27322 + unsigned long flags;
27323 + struct mm_struct *mm;
27325 + mm = get_task_mm(current);
27327 + if (ctx->flags.mmap_nlock == 0) {
27328 + PFM_DBG("doing down_write");
27329 + down_write(&mm->mmap_sem);
27332 + mm->locked_vm -= size >> PAGE_SHIFT;
27334 + PFM_DBG("size=%zu locked_vm=%lu", size, mm->locked_vm);
27336 + if (ctx->flags.mmap_nlock == 0)
27337 + up_write(&mm->mmap_sem);
27342 + spin_lock_irqsave(&pfm_res_lock, flags);
27344 + pfm_res.smpl_buf_mem_cur -= size;
27346 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27350 + * pfm_session_acquire - reserve a per-thread or per-cpu session
27351 + * @is_system: true if per-cpu session
27352 + * @cpu: cpu number for per-cpu session
27356 + * -EBUSY: if conflicting session exist
27358 +int pfm_session_acquire(int is_system, u32 cpu)
27360 + unsigned long flags;
27365 + * validy checks on cpu_mask have been done upstream
27367 + spin_lock_irqsave(&pfm_res_lock, flags);
27369 + nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
27371 + PFM_DBG("in sys=%u task=%u is_sys=%d cpu=%u",
27373 + pfm_res.thread_sessions,
27379 + * cannot mix system wide and per-task sessions
27381 + if (pfm_res.thread_sessions > 0) {
27382 + PFM_DBG("%u conflicting thread_sessions",
27383 + pfm_res.thread_sessions);
27388 + if (cpu_isset(cpu, pfm_res.sys_cpumask)) {
27389 + PFM_DBG("conflicting session on CPU%u", cpu);
27394 + PFM_DBG("reserved session on CPU%u", cpu);
27396 + cpu_set(cpu, pfm_res.sys_cpumask);
27403 + pfm_res.thread_sessions++;
27406 + PFM_DBG("out sys=%u task=%u is_sys=%d cpu=%u",
27408 + pfm_res.thread_sessions,
27413 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27419 + * pfm_session_release - release a per-cpu or per-thread session
27420 + * @is_system: true if per-cpu session
27421 + * @cpu: cpu number for per-cpu session
27423 + * called from __pfm_unload_context()
27425 +void pfm_session_release(int is_system, u32 cpu)
27427 + unsigned long flags;
27429 + spin_lock_irqsave(&pfm_res_lock, flags);
27431 + PFM_DBG("in sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u",
27432 + cpus_weight(pfm_res.sys_cpumask),
27433 + pfm_res.thread_sessions,
27437 + cpu_clear(cpu, pfm_res.sys_cpumask);
27439 + pfm_res.thread_sessions--;
27441 + PFM_DBG("out sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u",
27442 + cpus_weight(pfm_res.sys_cpumask),
27443 + pfm_res.thread_sessions,
27446 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27450 + * pfm_session_allcpus_acquire - acquire per-cpu sessions on all available cpus
27452 + * currently used by Oprofile on X86
27454 +int pfm_session_allcpus_acquire(void)
27456 + unsigned long flags;
27457 + u32 nsys_cpus, cpu;
27458 + int ret = -EBUSY;
27460 + spin_lock_irqsave(&pfm_res_lock, flags);
27462 + nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
27464 + PFM_DBG("in sys=%u task=%u",
27466 + pfm_res.thread_sessions);
27469 + PFM_DBG("already some system-wide sessions");
27474 + * cannot mix system wide and per-task sessions
27476 + if (pfm_res.thread_sessions) {
27477 + PFM_DBG("%u conflicting thread_sessions",
27478 + pfm_res.thread_sessions);
27482 + for_each_online_cpu(cpu) {
27483 + cpu_set(cpu, pfm_res.sys_cpumask);
27487 + PFM_DBG("out sys=%u task=%u",
27489 + pfm_res.thread_sessions);
27493 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27497 +EXPORT_SYMBOL(pfm_session_allcpus_acquire);
27500 + * pfm_session_allcpus_release - relase per-cpu sessions on all cpus
27502 + * currently used by Oprofile code
27504 +void pfm_session_allcpus_release(void)
27506 + unsigned long flags;
27507 + u32 nsys_cpus, cpu;
27509 + spin_lock_irqsave(&pfm_res_lock, flags);
27511 + nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
27513 + PFM_DBG("in sys=%u task=%u",
27515 + pfm_res.thread_sessions);
27518 + * XXX: could use __cpus_clear() with nbits
27520 + for_each_online_cpu(cpu) {
27521 + cpu_clear(cpu, pfm_res.sys_cpumask);
27525 + PFM_DBG("out sys=%u task=%u",
27527 + pfm_res.thread_sessions);
27529 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27531 +EXPORT_SYMBOL(pfm_session_allcpus_release);
27534 + * pfm_sysfs_res_show - return currnt resourcde usage for sysfs
27535 + * @buf: buffer to hold string in return
27536 + * @sz: size of buf
27537 + * @what: what to produce
27538 + * what=0 : thread_sessions
27539 + * what=1 : cpus_weight(sys_cpumask)
27540 + * what=2 : smpl_buf_mem_cur
27541 + * what=3 : pmu model name
27543 + * called from perfmon_sysfs.c
27544 + * return number of bytes written into buf (up to sz)
27546 +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what)
27548 + unsigned long flags;
27550 + spin_lock_irqsave(&pfm_res_lock, flags);
27553 + case 0: snprintf(buf, sz, "%u\n", pfm_res.thread_sessions);
27555 + case 1: snprintf(buf, sz, "%d\n", cpus_weight(pfm_res.sys_cpumask));
27557 + case 2: snprintf(buf, sz, "%zu\n", pfm_res.smpl_buf_mem_cur);
27560 + snprintf(buf, sz, "%s\n",
27561 + pfm_pmu_conf ? pfm_pmu_conf->pmu_name
27564 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27565 + return strlen(buf);
27567 diff --git a/perfmon/perfmon_rw.c b/perfmon/perfmon_rw.c
27568 new file mode 100644
27569 index 0000000..3168eb7
27571 +++ b/perfmon/perfmon_rw.c
27574 + * perfmon.c: perfmon2 PMC/PMD read/write system calls
27576 + * This file implements the perfmon2 interface which
27577 + * provides access to the hardware performance counters
27578 + * of the host processor.
27580 + * The initial version of perfmon.c was written by
27581 + * Ganesh Venkitachalam, IBM Corp.
27583 + * Then it was modified for perfmon-1.x by Stephane Eranian and
27584 + * David Mosberger, Hewlett Packard Co.
27586 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
27587 + * by Stephane Eranian, Hewlett Packard Co.
27589 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
27590 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
27591 + * David Mosberger-Tang <davidm@hpl.hp.com>
27593 + * More information about perfmon available at:
27594 + * http://perfmon2.sf.net/
27596 + * This program is free software; you can redistribute it and/or
27597 + * modify it under the terms of version 2 of the GNU General Public
27598 + * License as published by the Free Software Foundation.
27600 + * This program is distributed in the hope that it will be useful,
27601 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
27602 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27603 + * General Public License for more details.
27605 + * You should have received a copy of the GNU General Public License
27606 + * along with this program; if not, write to the Free Software
27607 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27610 +#include <linux/module.h>
27611 +#include <linux/kernel.h>
27612 +#include <linux/perfmon_kern.h>
27613 +#include "perfmon_priv.h"
27615 +#define PFM_REGFL_PMC_ALL (PFM_REGFL_NO_EMUL64)
27616 +#define PFM_REGFL_PMD_ALL (PFM_REGFL_RANDOM|PFM_REGFL_OVFL_NOTIFY)
27619 + * update_used_reg -- updated used_pmcs for a single PMD
27620 + * @set: set to update
27621 + * @cnum: new PMD to add
27623 + * This function adds the pmds and pmcs depending on PMD cnum
27625 +static inline void update_used_reg(struct pfm_context *ctx,
27626 + struct pfm_event_set *set, u16 cnum)
27628 + bitmap_or(cast_ulp(set->used_pmcs),
27629 + cast_ulp(set->used_pmcs),
27630 + cast_ulp(pfm_pmu_conf->pmd_desc[cnum].dep_pmcs),
27631 + ctx->regs.max_pmc);
27635 + * update_used -- update used_pmcs bitmask
27636 + * @set: event set to update
27637 + * @bv: bitmask to inspect for new PMD registers
27639 + * This function updates the used_pmcs bitmask for
27640 + * the set using bv, a bitmask of pmds. For each pmd in bv,
27641 + * its depending pmcs are added to used_pmcs.
27643 +static void update_used_pmcs(struct pfm_context *ctx,
27644 + struct pfm_event_set *set, unsigned long *bv)
27649 + max_pmd = ctx->regs.max_pmd;
27651 + n = bitmap_weight(bv, max_pmd);
27652 + for(p = 0; n; n--, p = q+1) {
27653 + q = find_next_bit(bv, max_pmd, p);
27654 + update_used_reg(ctx, set, q);
27659 + * update_changes -- update nused_pmcs, nused_pmds, write newly touched pmcs
27660 + * @ctx: context to use
27661 + * @set: event set to use
27662 + * @old_used_pmcs: former used_pmc bitmask
27663 + * @can_access: non-zero if PMU is accessible, i.e., can be written to
27665 + * This function updates nused_pmcs and nused_pmds after the last modificiation
27666 + * to an event set. When new pmcs are used, then they must be initialized such
27667 + * that we do not pick up stale values from another session.
27669 +static inline int update_changes(struct pfm_context *ctx, struct pfm_event_set *set,
27670 + unsigned long *old_used_pmcs)
27672 + struct pfarg_pmc req;
27673 + u16 max_pmc, max_pmd;
27674 + int n, p, q, ret = 0;
27676 + max_pmd = ctx->regs.max_pmd;
27677 + max_pmc = ctx->regs.max_pmc;
27680 + * update used counts
27682 + set->nused_pmds = bitmap_weight(cast_ulp(set->used_pmds), max_pmd);
27683 + set->nused_pmcs = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc);
27685 + PFM_DBG("set%u u_pmds=0x%llx nu_pmds=%u u_pmcs=0x%llx nu_pmcs=%u",
27687 + (unsigned long long)set->used_pmds[0],
27689 + (unsigned long long)set->used_pmcs[0],
27690 + set->nused_pmcs);
27692 + memset(&req, 0, sizeof(req));
27694 + n = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc);
27695 + for(p = 0; n; n--, p = q+1) {
27696 + q = find_next_bit(cast_ulp(set->used_pmcs), max_pmc, p);
27698 + if (test_bit(q, cast_ulp(old_used_pmcs)))
27702 + req.reg_value = set->pmcs[q];
27704 + ret = __pfm_write_pmcs(ctx, &req, 1);
27712 + * handle_smpl_bv - checks sampling bitmasks for new PMDs
27713 + * @ctx: context to use
27714 + * @set: set to use
27715 + * @bv: sampling bitmask
27717 + * scans the smpl bitmask looking for new PMDs (not yet used), if found
27718 + * invoke pfm_write_pmds() on them to get them initialized and marked used
27720 +static int handle_smpl_bv(struct pfm_context *ctx, struct pfm_event_set *set,
27721 + unsigned long *bv)
27723 + struct pfarg_pmd req;
27724 + int p, q, n, ret = 0;
27727 + memset(&req, 0, sizeof(req));
27729 + max_pmd = ctx->regs.max_pmd;
27731 + n = bitmap_weight(cast_ulp(bv), max_pmd);
27733 + for(p = 0; n; n--, p = q+1) {
27734 + q = find_next_bit(cast_ulp(bv), max_pmd, p);
27736 + if (test_bit(q, cast_ulp(set->used_pmds)))
27740 + req.reg_value = 0;
27742 + ret = __pfm_write_pmds(ctx, &req, 1, 0);
27750 + * is_invalid -- check if register index is within limits
27751 + * @cnum: register index
27752 + * @impl: bitmask of implemented registers
27753 + * @max: highest implemented registers + 1
27756 + * 0 is register index is valid
27759 +static inline int is_invalid(u16 cnum, unsigned long *impl, u16 max)
27761 + return cnum >= max || !test_bit(cnum, impl);
27765 + * __pfm_write_pmds - modified data registers
27766 + * @ctx: context to operate on
27767 + * @req: pfarg_pmd_t request from user
27768 + * @count: number of element in the pfarg_pmd_t vector
27769 + * @compat: used only on IA-64 to maintain backward compatibility with v2.0
27771 + * The function succeeds whether the context is attached or not.
27772 + * When attached to another thread, that thread must be stopped.
27774 + * The context is locked and interrupts are disabled.
27776 +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
27779 + struct pfm_event_set *set, *active_set;
27780 + u64 old_used_pmcs[PFM_PMC_BV];
27781 + unsigned long *smpl_pmds, *reset_pmds, *impl_pmds, *impl_rw_pmds;
27782 + u32 req_flags, flags;
27783 + u16 cnum, pmd_type, max_pmd;
27785 + int i, can_access_pmu;
27787 + pfm_pmd_check_t wr_func;
27789 + active_set = ctx->active_set;
27790 + max_pmd = ctx->regs.max_pmd;
27791 + impl_pmds = cast_ulp(ctx->regs.pmds);
27792 + impl_rw_pmds = cast_ulp(ctx->regs.rw_pmds);
27793 + wr_func = pfm_pmu_conf->pmd_write_check;
27794 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
27796 + can_access_pmu = 0;
27799 + * we cannot access the actual PMD registers when monitoring is masked
27801 + if (unlikely(ctx->state == PFM_CTX_LOADED))
27802 + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
27803 + || ctx->flags.system;
27805 + bitmap_copy(cast_ulp(old_used_pmcs),
27806 + cast_ulp(set->used_pmcs),
27807 + ctx->regs.max_pmc);
27810 + for (i = 0; i < count; i++, req++) {
27812 + cnum = req->reg_num;
27813 + set_id = req->reg_set;
27814 + req_flags = req->reg_flags;
27815 + smpl_pmds = cast_ulp(req->reg_smpl_pmds);
27816 + reset_pmds = cast_ulp(req->reg_reset_pmds);
27820 + * cannot write to unexisting
27821 + * writes to read-only register are ignored
27823 + if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) {
27824 + PFM_DBG("pmd%u is not available", cnum);
27828 + pmd_type = pfm_pmu_conf->pmd_desc[cnum].type;
27831 + * ensure only valid flags are set
27833 + if (req_flags & ~(PFM_REGFL_PMD_ALL)) {
27834 + PFM_DBG("pmd%u: invalid flags=0x%x",
27835 + cnum, req_flags);
27840 + * OVFL_NOTIFY is valid for all types of PMD.
27841 + * non counting PMD may trigger PMU interrupt
27842 + * and thus may trigger recording of a sample.
27843 + * This is true with IBS on AMD family 16.
27845 + if (req_flags & PFM_REGFL_OVFL_NOTIFY)
27846 + flags |= PFM_REGFL_OVFL_NOTIFY;
27849 + * We allow randomization to non counting PMD
27851 + if (req_flags & PFM_REGFL_RANDOM)
27852 + flags |= PFM_REGFL_RANDOM;
27855 + * verify validity of smpl_pmds
27857 + if (unlikely(!bitmap_subset(smpl_pmds, impl_pmds, PFM_MAX_PMDS))) {
27858 + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u",
27859 + (unsigned long long)req->reg_smpl_pmds[0],
27865 + * verify validity of reset_pmds
27866 + * check against impl_rw_pmds because it is not
27867 + * possible to reset read-only PMDs
27869 + if (unlikely(!bitmap_subset(reset_pmds, impl_rw_pmds, PFM_MAX_PMDS))) {
27870 + PFM_DBG("invalid reset_pmds=0x%llx for pmd%u",
27871 + (unsigned long long)req->reg_reset_pmds[0],
27877 + * locate event set
27879 + if (set_id != set->id) {
27880 + /* update number of used register for previous set */
27882 + ret = update_changes(ctx, set, cast_ulp(old_used_pmcs));
27887 + set = pfm_find_set(ctx, set_id, 0);
27888 + if (set == NULL) {
27889 + PFM_DBG("event set%u does not exist",
27893 + bitmap_copy(cast_ulp(old_used_pmcs),
27894 + cast_ulp(set->used_pmcs),
27895 + ctx->regs.max_pmc);
27899 + * execute write checker, if any
27901 + if (unlikely(wr_func && (pmd_type & PFM_REG_WC))) {
27902 + ret = (*wr_func)(ctx, set, req);
27910 + * now commit changes to software state
27913 + if (unlikely(compat))
27916 + if (bitmap_weight(smpl_pmds, max_pmd)) {
27917 + ret = handle_smpl_bv(ctx, set, smpl_pmds);
27920 + update_used_pmcs(ctx, set, cast_ulp(smpl_pmds));
27923 + bitmap_copy(cast_ulp(set->pmds[cnum].smpl_pmds),
27928 + if (bitmap_weight(reset_pmds, max_pmd)) {
27929 + ret = handle_smpl_bv(ctx, set, reset_pmds);
27932 + update_used_pmcs(ctx, set, cast_ulp(reset_pmds));
27935 + bitmap_copy(cast_ulp(set->pmds[cnum].reset_pmds),
27939 + set->pmds[cnum].flags = flags;
27941 + __set_bit(cnum, cast_ulp(set->used_pmds));
27942 + update_used_reg(ctx, set, cnum);
27945 + * we reprogram the PMD hence, we clear any pending
27946 + * ovfl. Does affect ovfl switch on restart but new
27947 + * value has already been established here
27949 + if (test_bit(cnum, cast_ulp(set->povfl_pmds))) {
27950 + set->npend_ovfls--;
27951 + __clear_bit(cnum, cast_ulp(set->povfl_pmds));
27953 + __clear_bit(cnum, cast_ulp(set->ovfl_pmds));
27956 + * update ovfl_notify
27958 + if (flags & PFM_REGFL_OVFL_NOTIFY)
27959 + __set_bit(cnum, cast_ulp(set->ovfl_notify));
27961 + __clear_bit(cnum, cast_ulp(set->ovfl_notify));
27964 + * establish new switch count
27966 + set->pmds[cnum].ovflsw_thres = req->reg_ovfl_switch_cnt;
27967 + set->pmds[cnum].ovflsw_ref_thres = req->reg_ovfl_switch_cnt;
27971 + * set last value to new value for all types of PMD
27973 + set->pmds[cnum].lval = req->reg_value;
27974 + set->pmds[cnum].value = req->reg_value;
27977 + * update reset values (not just for counters)
27979 + set->pmds[cnum].long_reset = req->reg_long_reset;
27980 + set->pmds[cnum].short_reset = req->reg_short_reset;
27983 + * update randomization mask
27985 + set->pmds[cnum].mask = req->reg_random_mask;
27987 + set->pmds[cnum].eventid = req->reg_smpl_eventid;
27989 + if (set == active_set) {
27990 + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMDS;
27991 + if (can_access_pmu)
27992 + pfm_write_pmd(ctx, cnum, req->reg_value);
27996 + PFM_DBG("set%u pmd%u=0x%llx flags=0x%x a_pmu=%d "
27997 + "ctx_pmd=0x%llx s_reset=0x%llx "
27998 + "l_reset=0x%llx s_pmds=0x%llx "
27999 + "r_pmds=0x%llx o_pmds=0x%llx "
28000 + "o_thres=%llu compat=%d eventid=%llx",
28003 + (unsigned long long)req->reg_value,
28004 + set->pmds[cnum].flags,
28006 + (unsigned long long)set->pmds[cnum].value,
28007 + (unsigned long long)set->pmds[cnum].short_reset,
28008 + (unsigned long long)set->pmds[cnum].long_reset,
28009 + (unsigned long long)set->pmds[cnum].smpl_pmds[0],
28010 + (unsigned long long)set->pmds[cnum].reset_pmds[0],
28011 + (unsigned long long)set->ovfl_pmds[0],
28012 + (unsigned long long)set->pmds[cnum].ovflsw_thres,
28014 + (unsigned long long)set->pmds[cnum].eventid);
28019 + update_changes(ctx, set, cast_ulp(old_used_pmcs));
28022 + * make changes visible
28024 + if (can_access_pmu)
28025 + pfm_arch_serialize();
28031 + * __pfm_write_pmcs - modified config registers
28032 + * @ctx: context to operate on
28033 + * @req: pfarg_pmc_t request from user
28034 + * @count: number of element in the pfarg_pmc_t vector
28037 + * The function succeeds whether the context is * attached or not.
28038 + * When attached to another thread, that thread must be stopped.
28040 + * The context is locked and interrupts are disabled.
28042 +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, int count)
28044 + struct pfm_event_set *set, *active_set;
28045 + u64 value, dfl_val, rsvd_msk;
28046 + unsigned long *impl_pmcs;
28047 + int i, can_access_pmu;
28050 + u16 cnum, pmc_type, max_pmc;
28051 + u32 flags, expert;
28052 + pfm_pmc_check_t wr_func;
28054 + active_set = ctx->active_set;
28056 + wr_func = pfm_pmu_conf->pmc_write_check;
28057 + max_pmc = ctx->regs.max_pmc;
28058 + impl_pmcs = cast_ulp(ctx->regs.pmcs);
28059 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28061 + expert = pfm_controls.flags & PFM_CTRL_FL_RW_EXPERT;
28063 + can_access_pmu = 0;
28066 + * we cannot access the actual PMC registers when monitoring is masked
28068 + if (unlikely(ctx->state == PFM_CTX_LOADED))
28069 + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
28070 + || ctx->flags.system;
28074 + for (i = 0; i < count; i++, req++) {
28076 + cnum = req->reg_num;
28077 + set_id = req->reg_set;
28078 + value = req->reg_value;
28079 + flags = req->reg_flags;
28082 + * no access to unavailable PMC register
28084 + if (unlikely(is_invalid(cnum, impl_pmcs, max_pmc))) {
28085 + PFM_DBG("pmc%u is not available", cnum);
28089 + pmc_type = pfm_pmu_conf->pmc_desc[cnum].type;
28090 + dfl_val = pfm_pmu_conf->pmc_desc[cnum].dfl_val;
28091 + rsvd_msk = pfm_pmu_conf->pmc_desc[cnum].rsvd_msk;
28094 + * ensure only valid flags are set
28096 + if (flags & ~PFM_REGFL_PMC_ALL) {
28097 + PFM_DBG("pmc%u: invalid flags=0x%x", cnum, flags);
28102 + * locate event set
28104 + if (set_id != set->id) {
28105 + set = pfm_find_set(ctx, set_id, 0);
28106 + if (set == NULL) {
28107 + PFM_DBG("event set%u does not exist",
28114 + * set reserved bits to default values
28115 + * (reserved bits must be 1 in rsvd_msk)
28117 + * bypass via /sys/kernel/perfmon/mode = 1
28119 + if (likely(!expert))
28120 + value = (value & ~rsvd_msk) | (dfl_val & rsvd_msk);
28122 + if (flags & PFM_REGFL_NO_EMUL64) {
28123 + if (!(pmc_type & PFM_REG_NO64)) {
28124 + PFM_DBG("pmc%u no support for "
28125 + "PFM_REGFL_NO_EMUL64", cnum);
28128 + value &= ~pfm_pmu_conf->pmc_desc[cnum].no_emul64_msk;
28132 + * execute write checker, if any
28134 + if (likely(wr_func && (pmc_type & PFM_REG_WC))) {
28135 + req->reg_value = value;
28136 + ret = (*wr_func)(ctx, set, req);
28139 + value = req->reg_value;
28143 + * Now we commit the changes
28147 + * mark PMC register as used
28148 + * We do not track associated PMC register based on
28149 + * the fact that they will likely need to be written
28150 + * in order to become useful at which point the statement
28151 + * below will catch that.
28153 + * The used_pmcs bitmask is only useful on architectures where
28154 + * the PMC needs to be modified for particular bits, especially
28155 + * on overflow or to stop/start.
28157 + if (!test_bit(cnum, cast_ulp(set->used_pmcs))) {
28158 + __set_bit(cnum, cast_ulp(set->used_pmcs));
28159 + set->nused_pmcs++;
28162 + set->pmcs[cnum] = value;
28164 + if (set == active_set) {
28165 + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
28166 + if (can_access_pmu)
28167 + pfm_arch_write_pmc(ctx, cnum, value);
28170 + PFM_DBG("set%u pmc%u=0x%llx a_pmu=%d "
28171 + "u_pmcs=0x%llx nu_pmcs=%u",
28174 + (unsigned long long)value,
28176 + (unsigned long long)set->used_pmcs[0],
28177 + set->nused_pmcs);
28182 + * make sure the changes are visible
28184 + if (can_access_pmu)
28185 + pfm_arch_serialize();
28191 + * __pfm_read_pmds - read data registers
28192 + * @ctx: context to operate on
28193 + * @req: pfarg_pmd_t request from user
28194 + * @count: number of element in the pfarg_pmd_t vector
28197 + * The function succeeds whether the context is attached or not.
28198 + * When attached to another thread, that thread must be stopped.
28200 + * The context is locked and interrupts are disabled.
28202 +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count)
28204 + u64 val = 0, lval, ovfl_mask, hw_val;
28206 + unsigned long *impl_pmds;
28207 + struct pfm_event_set *set, *active_set;
28208 + int i, ret, can_access_pmu = 0;
28209 + u16 cnum, pmd_type, set_id, max_pmd;
28211 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
28212 + impl_pmds = cast_ulp(ctx->regs.pmds);
28213 + max_pmd = ctx->regs.max_pmd;
28214 + active_set = ctx->active_set;
28215 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28217 + if (likely(ctx->state == PFM_CTX_LOADED)) {
28218 + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
28219 + || ctx->flags.system;
28221 + if (can_access_pmu)
28222 + pfm_arch_serialize();
28226 + * on both UP and SMP, we can only read the PMD from the hardware
28227 + * register when the task is the owner of the local PMU.
28230 + for (i = 0; i < count; i++, req++) {
28232 + cnum = req->reg_num;
28233 + set_id = req->reg_set;
28235 + if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) {
28236 + PFM_DBG("pmd%u is not implemented/unaccessible", cnum);
28240 + pmd_type = pfm_pmu_conf->pmd_desc[cnum].type;
28243 + * locate event set
28245 + if (set_id != set->id) {
28246 + set = pfm_find_set(ctx, set_id, 0);
28247 + if (set == NULL) {
28248 + PFM_DBG("event set%u does not exist",
28254 + * it is not possible to read a PMD which was not requested:
28255 + * - explicitly written via pfm_write_pmds()
28256 + * - provided as a reg_smpl_pmds[] to another PMD during
28257 + * pfm_write_pmds()
28259 + * This is motivated by security and for optimization purposes:
28260 + * - on context switch restore, we can restore only what
28261 + * we use (except when regs directly readable at user
28262 + * level, e.g., IA-64 self-monitoring, I386 RDPMC).
28263 + * - do not need to maintain PMC -> PMD dependencies
28265 + if (unlikely(!test_bit(cnum, cast_ulp(set->used_pmds)))) {
28266 + PFM_DBG("pmd%u cannot read, because not used", cnum);
28270 + val = set->pmds[cnum].value;
28271 + lval = set->pmds[cnum].lval;
28274 + * extract remaining ovfl to switch
28276 + sw_cnt = set->pmds[cnum].ovflsw_thres;
28279 + * If the task is not the current one, then we check if the
28280 + * PMU state is still in the local live register due to lazy
28281 + * ctxsw. If true, then we read directly from the registers.
28283 + if (set == active_set && can_access_pmu) {
28284 + hw_val = pfm_read_pmd(ctx, cnum);
28285 + if (pmd_type & PFM_REG_C64)
28286 + val = (val & ~ovfl_mask) | (hw_val & ovfl_mask);
28291 + PFM_DBG("set%u pmd%u=0x%llx sw_thr=%llu lval=0x%llx",
28294 + (unsigned long long)val,
28295 + (unsigned long long)sw_cnt,
28296 + (unsigned long long)lval);
28298 + req->reg_value = val;
28299 + req->reg_last_reset_val = lval;
28300 + req->reg_ovfl_switch_cnt = sw_cnt;
28306 diff --git a/perfmon/perfmon_sets.c b/perfmon/perfmon_sets.c
28307 new file mode 100644
28308 index 0000000..24534cb
28310 +++ b/perfmon/perfmon_sets.c
28313 + * perfmon_sets.c: perfmon2 event sets and multiplexing functions
28315 + * This file implements the perfmon2 interface which
28316 + * provides access to the hardware performance counters
28317 + * of the host processor.
28319 + * The initial version of perfmon.c was written by
28320 + * Ganesh Venkitachalam, IBM Corp.
28322 + * Then it was modified for perfmon-1.x by Stephane Eranian and
28323 + * David Mosberger, Hewlett Packard Co.
28325 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
28326 + * by Stephane Eranian, Hewlett Packard Co.
28328 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
28329 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
28330 + * David Mosberger-Tang <davidm@hpl.hp.com>
28332 + * More information about perfmon available at:
28333 + * http://perfmon2.sf.net
28335 + * This program is free software; you can redistribute it and/or
28336 + * modify it under the terms of version 2 of the GNU General Public
28337 + * License as published by the Free Software Foundation.
28339 + * This program is distributed in the hope that it will be useful,
28340 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
28341 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28342 + * General Public License for more details.
28344 + * You should have received a copy of the GNU General Public License
28345 + * along with this program; if not, write to the Free Software
28346 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28349 +#include <linux/kernel.h>
28350 +#include <linux/perfmon_kern.h>
28351 +#include "perfmon_priv.h"
28353 +static struct kmem_cache *pfm_set_cachep;
28356 + * pfm_reload_switch_thresholds - reload overflow-based switch thresholds per set
28357 + * @set: the set for which to reload thresholds
28360 +static void pfm_reload_switch_thresholds(struct pfm_context *ctx,
28361 + struct pfm_event_set *set)
28364 + u16 i, max, first;
28366 + used_pmds = set->used_pmds;
28367 + first = ctx->regs.first_intr_pmd;
28368 + max = ctx->regs.max_intr_pmd;
28370 + for (i = first; i < max; i++) {
28371 + if (test_bit(i, cast_ulp(used_pmds))) {
28372 + set->pmds[i].ovflsw_thres = set->pmds[i].ovflsw_ref_thres;
28374 + PFM_DBG("set%u pmd%u ovflsw_thres=%llu",
28377 + (unsigned long long)set->pmds[i].ovflsw_thres);
28383 + * pfm_prepare_sets - initialize sets on pfm_load_context
28384 + * @ctx : context to operate on
28385 + * @load_set: set to activate first
28387 + * connect all sets, reset internal fields
28389 +struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set)
28391 + struct pfm_event_set *set, *p;
28395 + * locate first set to activate
28397 + set = pfm_find_set(ctx, load_set, 0);
28401 + if (set->flags & PFM_SETFL_OVFL_SWITCH)
28402 + pfm_reload_switch_thresholds(ctx, set);
28404 + max = ctx->regs.max_intr_pmd;
28406 + list_for_each_entry(p, &ctx->set_list, list) {
28408 + * cleanup bitvectors
28410 + bitmap_zero(cast_ulp(p->ovfl_pmds), max);
28411 + bitmap_zero(cast_ulp(p->povfl_pmds), max);
28413 + p->npend_ovfls = 0;
28416 + * we cannot just use plain clear because of arch-specific flags
28418 + p->priv_flags &= ~(PFM_SETFL_PRIV_MOD_BOTH|PFM_SETFL_PRIV_SWITCH);
28420 + * neither duration nor runs are reset because typically loading/unloading
28421 + * does not mean counts are reset. To reset, the set must be modified
28428 + * called by hrtimer_interrupt()
28430 + * This is the only function where we come with
28431 + * cpu_base->lock held before ctx->lock
28433 + * interrupts are disabled
28435 +enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t)
28437 + struct pfm_event_set *set;
28438 + struct pfm_context *ctx;
28439 + unsigned long flags;
28440 + enum hrtimer_restart ret = HRTIMER_NORESTART;
28443 + * prevent against race with unload
28445 + ctx = __get_cpu_var(pmu_ctx);
28447 + return HRTIMER_NORESTART;
28449 + spin_lock_irqsave(&ctx->lock, flags);
28451 + set = ctx->active_set;
28454 + * switching occurs only when context is attached
28456 + if (ctx->state != PFM_CTX_LOADED)
28459 + * timer does not run while monitoring is inactive (not started)
28461 + if (!pfm_arch_is_active(ctx))
28464 + pfm_stats_inc(handle_timeout_count);
28466 + ret = pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_SHORT, 0);
28468 + spin_unlock_irqrestore(&ctx->lock, flags);
28474 + * always operating on the current task
28475 + * interrupts are masked
28478 + * - new_set: new set to switch to, if NULL follow normal chain
28480 +enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx,
28481 + struct pfm_event_set *new_set,
28485 + struct pfm_event_set *set;
28488 + int is_system, is_active, nn;
28489 + enum hrtimer_restart ret = HRTIMER_NORESTART;
28491 + now = sched_clock();
28492 + set = ctx->active_set;
28493 + is_active = pfm_arch_is_active(ctx);
28496 + * if no set is explicitly requested,
28497 + * use the set_switch_next field
28501 + * we use round-robin unless the user specified
28502 + * a particular set to go to.
28504 + new_set = list_first_entry(&set->list, struct pfm_event_set, list);
28505 + if (&new_set->list == &ctx->set_list)
28506 + new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28509 + PFM_DBG_ovfl("state=%d act=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u "
28510 + "next_runs=%llu new_npend=%d reset_mode=%d reset_pmds=%llx",
28514 + (unsigned long long)set->runs,
28515 + set->npend_ovfls,
28517 + (unsigned long long)new_set->runs,
28518 + new_set->npend_ovfls,
28520 + (unsigned long long)new_set->reset_pmds[0]);
28522 + is_system = ctx->flags.system;
28523 + new_flags = new_set->flags;
28526 + * nothing more to do
28528 + if (new_set == set)
28529 + goto skip_same_set;
28532 + pfm_arch_stop(current, ctx);
28533 + pfm_save_pmds(ctx, set);
28535 + * compute elapsed ns for active set
28537 + set->duration += now - set->duration_start;
28540 + pfm_arch_restore_pmds(ctx, new_set);
28542 + * if masked, we must restore the pmcs such that they
28543 + * do not capture anything.
28545 + pfm_arch_restore_pmcs(ctx, new_set);
28547 + if (new_set->npend_ovfls) {
28548 + pfm_arch_resend_irq(ctx);
28549 + pfm_stats_inc(ovfl_intr_replay_count);
28552 + new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
28557 + * reset switch threshold
28559 + if (new_flags & PFM_SETFL_OVFL_SWITCH)
28560 + pfm_reload_switch_thresholds(ctx, new_set);
28563 + * reset overflowed PMD registers in new set
28565 + nn = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd);
28567 + pfm_reset_pmds(ctx, new_set, nn, reset_mode);
28571 + * This is needed when coming from pfm_start()
28573 + * When switching to the same set, there is no
28574 + * need to restart
28577 + goto skip_restart;
28581 + * do not need to restart when same set
28583 + if (new_set != set) {
28584 + ctx->active_set = new_set;
28585 + new_set->duration_start = now;
28586 + pfm_arch_start(current, ctx);
28589 + * install new timeout if necessary
28591 + if (new_flags & PFM_SETFL_TIME_SWITCH) {
28592 + struct hrtimer *h;
28593 + h = &__get_cpu_var(pfm_hrtimer);
28594 + hrtimer_forward(h, h->base->get_time(), new_set->hrtimer_exp);
28595 + new_set->hrtimer_rem = new_set->hrtimer_exp;
28596 + ret = HRTIMER_RESTART;
28601 + ctx->active_set = new_set;
28603 + end = sched_clock();
28605 + pfm_stats_inc(set_switch_count);
28606 + pfm_stats_add(set_switch_ns, end - now);
28612 + * called from __pfm_overflow_handler() to switch event sets.
28613 + * monitoring is stopped, task is current, interrupts are masked.
28614 + * compared to pfm_switch_sets(), this version is simplified because
28615 + * it knows about the call path. There is no need to stop monitoring
28616 + * because it is already frozen by PMU handler.
28618 +void pfm_switch_sets_from_intr(struct pfm_context *ctx)
28620 + struct pfm_event_set *set, *new_set;
28623 + int is_system, n;
28625 + now = sched_clock();
28626 + set = ctx->active_set;
28627 + new_set = list_first_entry(&set->list, struct pfm_event_set, list);
28628 + if (&new_set->list == &ctx->set_list)
28629 + new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28631 + PFM_DBG_ovfl("state=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u "
28632 + "next_runs=%llu new_npend=%d new_r_pmds=%llx",
28635 + (unsigned long long)set->runs,
28636 + set->npend_ovfls,
28638 + (unsigned long long)new_set->runs,
28639 + new_set->npend_ovfls,
28640 + (unsigned long long)new_set->reset_pmds[0]);
28642 + is_system = ctx->flags.system;
28643 + new_flags = new_set->flags;
28646 + * nothing more to do
28648 + if (new_set == set)
28649 + goto skip_same_set;
28652 + * switch on intr only when set has OVFL_SWITCH
28654 + BUG_ON(set->flags & PFM_SETFL_TIME_SWITCH);
28657 + * when called from PMU intr handler, monitoring
28658 + * is already stopped
28660 + * save current PMD registers, we use a special
28661 + * form for performance reason. On some architectures,
28662 + * such as x86, the pmds are already saved when entering
28663 + * the PMU interrupt handler via pfm-arch_intr_freeze()
28664 + * so we don't need to save them again. On the contrary,
28665 + * on IA-64, they are not saved by freeze, thus we have to
28668 + pfm_arch_save_pmds_from_intr(ctx, set);
28671 + * compute elapsed ns for active set
28673 + set->duration += now - set->duration_start;
28675 + pfm_arch_restore_pmds(ctx, new_set);
28678 + * must not be restored active as we are still executing in the
28679 + * PMU interrupt handler. activation is deferred to unfreeze PMU
28681 + pfm_arch_restore_pmcs(ctx, new_set);
28684 + * check for pending interrupt on incoming set.
28685 + * interrupts are masked so handler call deferred
28687 + if (new_set->npend_ovfls) {
28688 + pfm_arch_resend_irq(ctx);
28689 + pfm_stats_inc(ovfl_intr_replay_count);
28692 + * no need to restore anything, that is already done
28694 + new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
28696 + * reset duration counter
28698 + new_set->duration_start = now;
28704 + * reset switch threshold
28706 + if (new_flags & PFM_SETFL_OVFL_SWITCH)
28707 + pfm_reload_switch_thresholds(ctx, new_set);
28710 + * reset overflowed PMD registers
28712 + n = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd);
28714 + pfm_reset_pmds(ctx, new_set, n, PFM_PMD_RESET_SHORT);
28719 + * Came here following a interrupt which triggered a switch, i.e.,
28720 + * previous set was using OVFL_SWITCH, thus we just need to arm
28721 + * check if the next set is using timeout, and if so arm the timer.
28723 + * Timeout is always at least one tick away. No risk of having to
28724 + * invoke the timeout handler right now. In any case, cb_mode is
28725 + * set to HRTIMER_CB_IRQSAFE_NO_SOFTIRQ such that hrtimer_start
28726 + * will not try to wakeup the softirqd which could cause a locking
28729 + if (new_flags & PFM_SETFL_TIME_SWITCH) {
28730 + hrtimer_start(&__get_cpu_var(pfm_hrtimer), set->hrtimer_exp, HRTIMER_MODE_REL);
28731 + PFM_DBG("armed new timeout for set%u", new_set->id);
28734 + ctx->active_set = new_set;
28736 + end = sched_clock();
28738 + pfm_stats_inc(set_switch_count);
28739 + pfm_stats_add(set_switch_ns, end - now);
28743 +static int pfm_setfl_sane(struct pfm_context *ctx, u32 flags)
28745 +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH)
28748 + ret = pfm_arch_setfl_sane(ctx, flags);
28752 + if ((flags & PFM_SETFL_BOTH_SWITCH) == PFM_SETFL_BOTH_SWITCH) {
28753 + PFM_DBG("both switch ovfl and switch time are set");
28760 + * it is never possible to change the identification of an existing set
28762 +static int pfm_change_evtset(struct pfm_context *ctx,
28763 + struct pfm_event_set *set,
28764 + struct pfarg_setdesc *req)
28766 + struct timeval tv;
28767 + struct timespec ts;
28775 + BUG_ON(ctx->state == PFM_CTX_LOADED);
28777 + set_id = req->set_id;
28778 + flags = req->set_flags;
28780 + ret = pfm_setfl_sane(ctx, flags);
28782 + PFM_DBG("invalid flags 0x%x set %u", flags, set_id);
28787 + * compute timeout value
28789 + if (flags & PFM_SETFL_TIME_SWITCH) {
28791 + * timeout value of zero is illegal
28793 + if (req->set_timeout == 0) {
28794 + PFM_DBG("invalid timeout 0");
28798 + hrtimer_get_res(CLOCK_MONOTONIC, &ts);
28799 + res_ns = (long)ktime_to_ns(timespec_to_ktime(ts));
28802 + * round-up to multiple of clock resolution
28803 + * timeout = ((req->set_timeout+res_ns-1)/res_ns)*res_ns;
28805 + * u64 division missing on 32-bit arch, so use div_s64_rem
28807 + d = div_s64_rem(req->set_timeout, res_ns, &rem);
28809 + PFM_DBG("set%u flags=0x%x req_timeout=%lluns "
28810 + "HZ=%u TICK_NSEC=%lu clock_res=%ldns rem=%dns",
28813 + (unsigned long long)req->set_timeout,
28819 + * Only accept timeout, we can actually achieve.
28820 + * users can invoke clock_getres(CLOCK_MONOTONIC)
28821 + * to figure out resolution and adjust timeout
28824 + PFM_DBG("set%u invalid timeout=%llu",
28826 + (unsigned long long)req->set_timeout);
28830 + tv = ns_to_timeval(req->set_timeout);
28831 + kt = timeval_to_ktime(tv);
28832 + set->hrtimer_exp = kt;
28834 + set->hrtimer_exp = ktime_set(0, 0);
28840 + set->id = set_id;
28841 + set->flags = flags;
28842 + set->priv_flags = 0;
28845 + * activation and duration counters are reset as
28846 + * most likely major things will change in the set
28849 + set->duration = 0;
28855 + * this function does not modify the next field
28857 +static void pfm_initialize_set(struct pfm_context *ctx,
28858 + struct pfm_event_set *set)
28863 + max_pmc = ctx->regs.max_pmc;
28864 + impl_pmcs = ctx->regs.pmcs;
28867 + * install default values for all PMC registers
28869 + for (i = 0; i < max_pmc; i++) {
28870 + if (test_bit(i, cast_ulp(impl_pmcs))) {
28871 + set->pmcs[i] = pfm_pmu_conf->pmc_desc[i].dfl_val;
28872 + PFM_DBG("set%u pmc%u=0x%llx",
28875 + (unsigned long long)set->pmcs[i]);
28880 + * PMD registers are set to 0 when the event set is allocated,
28881 + * hence we do not need to explicitly initialize them.
28883 + * For virtual PMD registers (i.e., those tied to a SW resource)
28884 + * their value becomes meaningful once the context is attached.
28889 + * look for an event set using its identification. If the set does not
28891 + * - if alloc == 0 then return error
28892 + * - if alloc == 1 then allocate set
28894 + * alloc is one ONLY when coming from pfm_create_evtsets() which can only
28895 + * be called when the context is detached, i.e. monitoring is stopped.
28897 +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, int alloc)
28899 + struct pfm_event_set *set = NULL, *prev, *new_set;
28901 + PFM_DBG("looking for set=%u", set_id);
28904 + list_for_each_entry(set, &ctx->set_list, list) {
28905 + if (set->id == set_id)
28907 + if (set->id > set_id)
28916 + * we are holding the context spinlock and interrupts
28917 + * are unmasked. We must use GFP_ATOMIC as we cannot
28918 + * sleep while holding a spin lock.
28920 + new_set = kmem_cache_zalloc(pfm_set_cachep, GFP_ATOMIC);
28924 + new_set->id = set_id;
28926 + INIT_LIST_HEAD(&new_set->list);
28928 + if (prev == NULL) {
28929 + list_add(&(new_set->list), &ctx->set_list);
28931 + PFM_DBG("add after set=%u", prev->id);
28932 + list_add(&(new_set->list), &prev->list);
28938 + * pfm_create_initial_set - create initial set from __pfm_c reate_context
28939 + * @ctx: context to atatched the set to
28941 +int pfm_create_initial_set(struct pfm_context *ctx)
28943 + struct pfm_event_set *set;
28946 + * create initial set0
28948 + if (!pfm_find_set(ctx, 0, 1))
28951 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28953 + pfm_initialize_set(ctx, set);
28959 + * context is unloaded for this command. Interrupts are enabled
28961 +int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req,
28964 + struct pfm_event_set *set;
28968 + for (i = 0; i < count; i++, req++) {
28969 + set_id = req->set_id;
28971 + PFM_DBG("set_id=%u", set_id);
28973 + set = pfm_find_set(ctx, set_id, 1);
28977 + ret = pfm_change_evtset(ctx, set, req);
28979 + goto error_params;
28981 + pfm_initialize_set(ctx, set);
28985 + PFM_DBG("cannot allocate set %u", set_id);
28991 +int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req,
28994 + struct pfm_event_set *set;
28995 + int i, is_system, is_loaded, is_self, ret;
28999 + end = sched_clock();
29001 + is_system = ctx->flags.system;
29002 + is_loaded = ctx->state == PFM_CTX_LOADED;
29003 + is_self = ctx->task == current || is_system;
29006 + for (i = 0; i < count; i++, req++) {
29008 + set_id = req->set_id;
29010 + list_for_each_entry(set, &ctx->set_list, list) {
29011 + if (set->id == set_id)
29013 + if (set->id > set_id)
29017 + req->set_flags = set->flags;
29020 + * compute leftover timeout
29022 + * lockdep may complain about lock inversion
29023 + * because of get_remaining() however, this
29024 + * applies to self-montoring only, thus the
29025 + * thread cannot be in the timeout handler
29026 + * and here at the same time given that we
29027 + * run with interrupts disabled
29029 + if (is_loaded && is_self) {
29030 + struct hrtimer *h;
29031 + h = &__get_cpu_var(pfm_hrtimer);
29032 + req->set_timeout = ktime_to_ns(hrtimer_get_remaining(h));
29035 + * hrtimer_rem zero when not using
29036 + * timeout-based switching
29038 + req->set_timeout = ktime_to_ns(set->hrtimer_rem);
29041 + req->set_runs = set->runs;
29042 + req->set_act_duration = set->duration;
29045 + * adjust for active set if needed
29047 + if (is_system && is_loaded && ctx->flags.started
29048 + && set == ctx->active_set)
29049 + req->set_act_duration += end - set->duration_start;
29052 + * copy the list of pmds which last overflowed
29054 + bitmap_copy(cast_ulp(req->set_ovfl_pmds),
29055 + cast_ulp(set->ovfl_pmds),
29059 + * copy bitmask of available PMU registers
29061 + * must copy over the entire vector to avoid
29062 + * returning bogus upper bits pass by user
29064 + bitmap_copy(cast_ulp(req->set_avail_pmcs),
29065 + cast_ulp(ctx->regs.pmcs),
29068 + bitmap_copy(cast_ulp(req->set_avail_pmds),
29069 + cast_ulp(ctx->regs.pmds),
29072 + PFM_DBG("set%u flags=0x%x eff_usec=%llu runs=%llu "
29073 + "a_pmcs=0x%llx a_pmds=0x%llx",
29076 + (unsigned long long)req->set_timeout,
29077 + (unsigned long long)set->runs,
29078 + (unsigned long long)ctx->regs.pmcs[0],
29079 + (unsigned long long)ctx->regs.pmds[0]);
29087 + * context is unloaded for this command. Interrupts are enabled
29089 +int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count)
29091 + struct pfarg_setdesc *req = arg;
29092 + struct pfm_event_set *set;
29097 + for (i = 0; i < count; i++, req++) {
29098 + set_id = req->set_id;
29100 + list_for_each_entry(set, &ctx->set_list, list) {
29101 + if (set->id == set_id)
29103 + if (set->id > set_id)
29109 + * clear active set if necessary.
29110 + * will be updated when context is loaded
29112 + if (set == ctx->active_set)
29113 + ctx->active_set = NULL;
29115 + list_del(&set->list);
29117 + kmem_cache_free(pfm_set_cachep, set);
29119 + PFM_DBG("set%u deleted", set_id);
29127 + * called from pfm_context_free() to free all sets
29129 +void pfm_free_sets(struct pfm_context *ctx)
29131 + struct pfm_event_set *set, *tmp;
29133 + list_for_each_entry_safe(set, tmp, &ctx->set_list, list) {
29134 + list_del(&set->list);
29135 + kmem_cache_free(pfm_set_cachep, set);
29140 + * pfm_restart_timer - restart hrtimer taking care of expired timeout
29141 + * @ctx : context to work with
29142 + * @set : current active set
29144 + * Must be called on the processor on which the timer is to be armed.
29145 + * Assumes context is locked and interrupts are masked
29147 + * Upon return the active set for the context may have changed
29149 +void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set)
29151 + struct hrtimer *h;
29152 + enum hrtimer_restart ret;
29154 + h = &__get_cpu_var(pfm_hrtimer);
29156 + PFM_DBG_ovfl("hrtimer=%lld", (long long)ktime_to_ns(set->hrtimer_rem));
29158 + if (ktime_to_ns(set->hrtimer_rem) > 0) {
29159 + hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL);
29162 + * timer was not re-armed because it has already expired
29163 + * timer was not enqueued, we need to switch set now
29165 + pfm_stats_inc(set_switch_exp);
29167 + ret = pfm_switch_sets(ctx, NULL, 1, 0);
29168 + set = ctx->active_set;
29169 + if (ret == HRTIMER_RESTART)
29170 + hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL);
29174 +int __init pfm_init_sets(void)
29176 + pfm_set_cachep = kmem_cache_create("pfm_event_set",
29177 + sizeof(struct pfm_event_set),
29178 + SLAB_HWCACHE_ALIGN, 0, NULL);
29179 + if (!pfm_set_cachep) {
29180 + PFM_ERR("cannot initialize event set slab");
29185 diff --git a/perfmon/perfmon_smpl.c b/perfmon/perfmon_smpl.c
29186 new file mode 100644
29187 index 0000000..e31fb15
29189 +++ b/perfmon/perfmon_smpl.c
29192 + * perfmon_smpl.c: perfmon2 sampling management
29194 + * This file implements the perfmon2 interface which
29195 + * provides access to the hardware performance counters
29196 + * of the host processor.
29199 + * The initial version of perfmon.c was written by
29200 + * Ganesh Venkitachalam, IBM Corp.
29202 + * Then it was modified for perfmon-1.x by Stephane Eranian and
29203 + * David Mosberger, Hewlett Packard Co.
29205 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
29206 + * by Stephane Eranian, Hewlett Packard Co.
29208 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
29209 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
29210 + * David Mosberger-Tang <davidm@hpl.hp.com>
29212 + * More information about perfmon available at:
29213 + * http://perfmon2.sf.net
29215 + * This program is free software; you can redistribute it and/or
29216 + * modify it under the terms of version 2 of the GNU General Public
29217 + * License as published by the Free Software Foundation.
29219 + * This program is distributed in the hope that it will be useful,
29220 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
29221 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29222 + * General Public License for more details.
29224 + * You should have received a copy of the GNU General Public License
29225 + * along with this program; if not, write to the Free Software
29226 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29229 +#include <linux/module.h>
29230 +#include <linux/kernel.h>
29231 +#include <linux/vmalloc.h>
29232 +#include <linux/fs.h>
29233 +#include <linux/mm.h>
29234 +#include <linux/random.h>
29235 +#include <linux/uaccess.h>
29236 +#include <linux/perfmon_kern.h>
29238 +#include "perfmon_priv.h"
29241 + * pfm_smpl_buf_alloc - allocate memory for sampling buffer
29242 + * @ctx: context to operate on
29243 + * @rsize: requested size
29245 + * called from pfm_smpl_buffer_alloc_old() (IA64-COMPAT)
29246 + * and pfm_setup_smpl_fmt()
29248 + * interrupts are enabled, context is not locked.
29250 + * function is not static because it is called from the IA-64
29251 + * compatibility module (perfmon_compat.c)
29253 +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize)
29255 +#if PFM_ARCH_SMPL_ALIGN_SIZE > 0
29256 +#define PFM_ALIGN_SMPL(a, f) (void *)((((unsigned long)(a))+(f-1)) & ~(f-1))
29258 +#define PFM_ALIGN_SMPL(a, f) (a)
29260 + void *addr, *real_addr;
29261 + size_t size, real_size;
29267 + * align page boundary
29269 + size = PAGE_ALIGN(rsize);
29272 + * On some arch, it may be necessary to get an alignment greater
29273 + * than page size to avoid certain cache effects (e.g., MIPS).
29274 + * This is the reason for PFM_ARCH_SMPL_ALIGN_SIZE.
29276 + real_size = size + PFM_ARCH_SMPL_ALIGN_SIZE;
29278 + PFM_DBG("req_size=%zu size=%zu real_size=%zu",
29283 + ret = pfm_smpl_buf_space_acquire(ctx, real_size);
29288 + * vmalloc can sleep. we do not hold
29289 + * any spinlock and interrupts are enabled
29291 + real_addr = addr = vmalloc(real_size);
29292 + if (!real_addr) {
29293 + PFM_DBG("cannot allocate sampling buffer");
29298 + * align the useable sampling buffer address to the arch requirement
29299 + * This is a nop on most architectures
29301 + addr = PFM_ALIGN_SMPL(real_addr, PFM_ARCH_SMPL_ALIGN_SIZE);
29303 + memset(addr, 0, real_size);
29306 + * due to cache aliasing, it may be necessary to flush the pages
29307 + * on certain architectures (e.g., MIPS)
29309 + pfm_cacheflush(addr, real_size);
29312 + * what needs to be freed
29314 + ctx->smpl_real_addr = real_addr;
29315 + ctx->smpl_real_size = real_size;
29318 + * what is actually available to user
29320 + ctx->smpl_addr = addr;
29321 + ctx->smpl_size = size;
29323 + PFM_DBG("addr=%p real_addr=%p", addr, real_addr);
29328 + * smpl_addr is NULL, no double freeing possible in pfm_context_free()
29330 + pfm_smpl_buf_space_release(ctx, real_size);
29336 + * pfm_smpl_buf_free - free resources associated with sampling
29337 + * @ctx: context to operate on
29339 +void pfm_smpl_buf_free(struct pfm_context *ctx)
29341 + struct pfm_smpl_fmt *fmt;
29343 + fmt = ctx->smpl_fmt;
29346 + * some formats may not use a buffer, yet they may
29347 + * need to be called on exit
29350 + if (fmt->fmt_exit)
29351 + (*fmt->fmt_exit)(ctx->smpl_addr);
29353 + * decrease refcount of sampling format
29355 + pfm_smpl_fmt_put(fmt);
29358 + if (ctx->smpl_addr) {
29359 + pfm_smpl_buf_space_release(ctx, ctx->smpl_real_size);
29361 + PFM_DBG("free buffer real_addr=0x%p real_size=%zu",
29362 + ctx->smpl_real_addr,
29363 + ctx->smpl_real_size);
29365 + vfree(ctx->smpl_real_addr);
29370 + * pfm_setup_smpl_fmt - initialization of sampling format and buffer
29371 + * @ctx: context to operate on
29372 + * @fmt_arg: smapling format arguments
29373 + * @ctx_flags: context flags as passed by user
29374 + * @filp: file descriptor associated with context
29376 + * called from __pfm_create_context()
29378 +int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg,
29379 + struct file *filp)
29381 + struct pfm_smpl_fmt *fmt;
29385 + fmt = ctx->smpl_fmt;
29388 + * validate parameters
29390 + if (fmt->fmt_validate) {
29391 + ret = (*fmt->fmt_validate)(ctx_flags,
29392 + ctx->regs.num_pmds,
29394 + PFM_DBG("validate(0x%x,%p)=%d", ctx_flags, fmt_arg, ret);
29400 + * check if buffer format needs buffer allocation
29403 + if (fmt->fmt_getsize) {
29404 + ret = (*fmt->fmt_getsize)(ctx_flags, fmt_arg, &size);
29406 + PFM_DBG("cannot get size ret=%d", ret);
29412 + * allocate buffer
29413 + * v20_compat is for IA-64 backward compatibility with perfmon v2.0
29416 +#ifdef CONFIG_IA64_PERFMON_COMPAT
29418 + * backward compatibility with perfmon v2.0 on Ia-64
29420 + if (ctx->flags.ia64_v20_compat)
29421 + ret = pfm_smpl_buf_alloc_compat(ctx, size, filp);
29424 + ret = pfm_smpl_buf_alloc(ctx, size);
29431 + if (fmt->fmt_init) {
29432 + ret = (*fmt->fmt_init)(ctx, ctx->smpl_addr, ctx_flags,
29433 + ctx->regs.num_pmds,
29437 + * if there was an error, the buffer/resource will be freed by
29438 + * via pfm_context_free()
29444 +void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
29448 + now = sched_clock();
29451 + * we save the PMD values such that we can read them while
29452 + * MASKED without having the thread stopped
29453 + * because monitoring is stopped
29455 + * pfm_save_pmds() could be avoided if we knew
29456 + * that pfm_arch_intr_freeze() had saved them already
29458 + pfm_save_pmds(ctx, set);
29459 + pfm_arch_mask_monitoring(ctx, set);
29461 + * accumulate the set duration up to this point
29463 + set->duration += now - set->duration_start;
29465 + ctx->state = PFM_CTX_MASKED;
29468 + * need to stop timer and remember remaining time
29469 + * will be reloaded in pfm_unmask_monitoring
29470 + * hrtimer is cancelled in the tail of the interrupt
29471 + * handler once the context is unlocked
29473 + if (set->flags & PFM_SETFL_TIME_SWITCH) {
29474 + struct hrtimer *h = &__get_cpu_var(pfm_hrtimer);
29475 + hrtimer_cancel(h);
29476 + set->hrtimer_rem = hrtimer_get_remaining(h);
29478 + PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart);
29482 + * pfm_unmask_monitoring - unmask monitoring
29483 + * @ctx: context to work with
29484 + * @set: current active set
29486 + * interrupts are masked when entering this function.
29487 + * context must be in MASKED state when calling.
29489 + * Upon return, the active set may have changed when using timeout
29490 + * based switching.
29492 +static void pfm_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
29494 + if (ctx->state != PFM_CTX_MASKED)
29497 + PFM_DBG_ovfl("unmasking monitoring");
29500 + * must be done before calling
29501 + * pfm_arch_unmask_monitoring()
29503 + ctx->state = PFM_CTX_LOADED;
29506 + * we need to restore the PMDs because they
29507 + * may have been modified by user while MASKED in
29508 + * which case the actual registers have no yet
29511 + pfm_arch_restore_pmds(ctx, set);
29514 + * call arch specific handler
29516 + pfm_arch_unmask_monitoring(ctx, set);
29519 + * clear force reload flag. May have been set
29520 + * in pfm_write_pmcs or pfm_write_pmds
29522 + set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
29525 + * reset set duration timer
29527 + set->duration_start = sched_clock();
29530 + * restart hrtimer if needed
29532 + if (set->flags & PFM_SETFL_TIME_SWITCH) {
29533 + pfm_restart_timer(ctx, set);
29534 + /* careful here as pfm_restart_timer may switch sets */
29538 +void pfm_reset_pmds(struct pfm_context *ctx,
29539 + struct pfm_event_set *set,
29543 + u64 val, mask, new_seed;
29544 + struct pfm_pmd *reg;
29545 + unsigned int i, not_masked;
29547 + not_masked = ctx->state != PFM_CTX_MASKED;
29549 + PFM_DBG_ovfl("%s r_pmds=0x%llx not_masked=%d",
29550 + reset_mode == PFM_PMD_RESET_LONG ? "long" : "short",
29551 + (unsigned long long)set->reset_pmds[0],
29554 + pfm_stats_inc(reset_pmds_count);
29556 + for (i = 0; num_pmds; i++) {
29557 + if (test_bit(i, cast_ulp(set->reset_pmds))) {
29560 + reg = set->pmds + i;
29562 + val = reset_mode == PFM_PMD_RESET_LONG ?
29563 + reg->long_reset : reg->short_reset;
29565 + if (reg->flags & PFM_REGFL_RANDOM) {
29566 + mask = reg->mask;
29567 + new_seed = random32();
29569 + /* construct a full 64-bit random value: */
29570 + if ((unlikely(mask >> 32) != 0))
29571 + new_seed |= (u64)random32() << 32;
29573 + /* counter values are negative numbers! */
29574 + val -= (new_seed & mask);
29577 + set->pmds[i].value = val;
29581 + * not all PMD to reset are necessarily
29585 + pfm_write_pmd(ctx, i, val);
29587 + PFM_DBG_ovfl("set%u pmd%u sval=0x%llx",
29590 + (unsigned long long)val);
29595 + * done with reset
29597 + bitmap_zero(cast_ulp(set->reset_pmds), i);
29600 + * make changes visible
29603 + pfm_arch_serialize();
29607 + * called from pfm_handle_work() and __pfm_restart()
29608 + * for system-wide and per-thread context to resume
29609 + * monitoring after a user level notification.
29611 + * In both cases, the context is locked and interrupts
29614 +void pfm_resume_after_ovfl(struct pfm_context *ctx)
29616 + struct pfm_smpl_fmt *fmt;
29618 + struct pfm_event_set *set;
29623 + hdr = ctx->smpl_addr;
29624 + fmt = ctx->smpl_fmt;
29625 + state = ctx->state;
29626 + set = ctx->active_set;
29633 + rst_ctrl = PFM_OVFL_CTRL_RESET;
29637 + * if using a sampling buffer format and it has a restart callback,
29638 + * then invoke it. hdr may be NULL, if the format does not use a
29641 + if (fmt && fmt->fmt_restart)
29642 + ret = (*fmt->fmt_restart)(state == PFM_CTX_LOADED, &rst_ctrl,
29645 + reset_pmds = set->reset_pmds;
29647 + PFM_DBG("fmt_restart=%d reset_count=%d set=%u r_pmds=0x%llx switch=%d "
29650 + ctx->flags.reset_count,
29652 + (unsigned long long)reset_pmds[0],
29653 + (set->priv_flags & PFM_SETFL_PRIV_SWITCH),
29658 + * switch set if needed
29660 + if (set->priv_flags & PFM_SETFL_PRIV_SWITCH) {
29661 + set->priv_flags &= ~PFM_SETFL_PRIV_SWITCH;
29662 + pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_LONG, 0);
29663 + set = ctx->active_set;
29664 + } else if (rst_ctrl & PFM_OVFL_CTRL_RESET) {
29666 + nn = bitmap_weight(cast_ulp(set->reset_pmds),
29667 + ctx->regs.max_pmd);
29669 + pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_LONG);
29672 + if (!(rst_ctrl & PFM_OVFL_CTRL_MASK))
29673 + pfm_unmask_monitoring(ctx, set);
29675 + PFM_DBG("stopping monitoring?");
29676 + ctx->state = PFM_CTX_LOADED;
29681 + * This function is called when we need to perform asynchronous
29682 + * work on a context. This function is called ONLY when about to
29683 + * return to user mode (very much like with signal handling).
29685 + * There are several reasons why we come here:
29687 + * - per-thread mode, not self-monitoring, to reset the counters
29688 + * after a pfm_restart()
29690 + * - we are zombie and we need to cleanup our state
29692 + * - we need to block after an overflow notification
29693 + * on a context with the PFM_OVFL_NOTIFY_BLOCK flag
29695 + * This function is never called for a system-wide context.
29697 + * pfm_handle_work() can be called with interrupts enabled
29698 + * (TIF_NEED_RESCHED) or disabled. The down_interruptible
29699 + * call may sleep, therefore we must re-enable interrupts
29700 + * to avoid deadlocks. It is safe to do so because this function
29701 + * is called ONLY when returning to user level, in which case
29702 + * there is no risk of kernel stack overflow due to deep
29703 + * interrupt nesting.
29705 +void pfm_handle_work(struct pt_regs *regs)
29707 + struct pfm_context *ctx;
29708 + unsigned long flags, dummy_flags;
29709 + int type, ret, info;
29713 + * This is just a temporary fix. Obviously we'd like to fix the powerpc
29714 + * code to make that check before calling __pfm_handle_work() to
29715 + * prevent the function call overhead, but the call is made from
29716 + * assembly code, so it will take a little while to figure out how to
29717 + * perform the check correctly.
29719 + if (!test_thread_flag(TIF_PERFMON_WORK))
29723 + if (!user_mode(regs))
29726 + clear_thread_flag(TIF_PERFMON_WORK);
29728 + pfm_stats_inc(handle_work_count);
29730 + ctx = current->pfm_context;
29731 + if (ctx == NULL) {
29732 + PFM_DBG("[%d] has no ctx", current->pid);
29736 + BUG_ON(ctx->flags.system);
29738 + spin_lock_irqsave(&ctx->lock, flags);
29740 + type = ctx->flags.work_type;
29741 + ctx->flags.work_type = PFM_WORK_NONE;
29743 + PFM_DBG("work_type=%d reset_count=%d",
29745 + ctx->flags.reset_count);
29748 + case PFM_WORK_ZOMBIE:
29750 + case PFM_WORK_RESET:
29751 + /* simply reset, no blocking */
29752 + goto skip_blocking;
29753 + case PFM_WORK_NONE:
29754 + PFM_DBG("unexpected PFM_WORK_NONE");
29755 + goto nothing_todo;
29756 + case PFM_WORK_BLOCK:
29759 + PFM_DBG("unkown type=%d", type);
29760 + goto nothing_todo;
29764 + * restore interrupt mask to what it was on entry.
29765 + * Could be enabled/disabled.
29767 + spin_unlock_irqrestore(&ctx->lock, flags);
29770 + * force interrupt enable because of down_interruptible()
29772 + local_irq_enable();
29774 + PFM_DBG("before block sleeping");
29777 + * may go through without blocking on SMP systems
29778 + * if restart has been received already by the time we call down()
29780 + ret = wait_for_completion_interruptible(&ctx->restart_complete);
29782 + PFM_DBG("after block sleeping ret=%d", ret);
29785 + * lock context and mask interrupts again
29786 + * We save flags into a dummy because we may have
29787 + * altered interrupts mask compared to entry in this
29790 + spin_lock_irqsave(&ctx->lock, dummy_flags);
29792 + if (ctx->state == PFM_CTX_ZOMBIE)
29796 + * in case of interruption of down() we don't restart anything
29799 + goto nothing_todo;
29803 + * iterate over the number of pending resets
29804 + * There are certain situations where there may be
29805 + * multiple notifications sent before a pfm_restart().
29806 + * As such, it may be that multiple pfm_restart() are
29807 + * issued before the monitored thread gets to
29808 + * pfm_handle_work(). To avoid losing restarts, pfm_restart()
29809 + * increments a counter (reset_counts). Here, we take this
29810 + * into account by potentially calling pfm_resume_after_ovfl()
29811 + * multiple times. It is up to the sampling format to take the
29812 + * appropriate actions.
29814 + while (ctx->flags.reset_count) {
29815 + pfm_resume_after_ovfl(ctx);
29816 + /* careful as active set may have changed */
29817 + ctx->flags.reset_count--;
29822 + * restore flags as they were upon entry
29824 + spin_unlock_irqrestore(&ctx->lock, flags);
29828 + PFM_DBG("context is zombie, bailing out");
29830 + __pfm_unload_context(ctx, &info);
29833 + * keep the spinlock check happy
29835 + spin_unlock(&ctx->lock);
29838 + * enable interrupt for vfree()
29840 + local_irq_enable();
29843 + * cancel timer now that context is unlocked
29845 + if (info & 0x2) {
29846 + ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
29847 + PFM_DBG("timeout cancel=%d", ret);
29851 + * actual context free
29853 + pfm_free_context(ctx);
29856 + * restore interrupts as they were upon entry
29858 + local_irq_restore(flags);
29860 + /* always true */
29862 + pfm_session_release(0, 0);
29866 + * __pfm_restart - resume monitoring after user-level notification
29867 + * @ctx: context to operate on
29868 + * @info: return information used to free resource once unlocked
29870 + * function called from sys_pfm_restart(). It is used when overflow
29871 + * notification is requested. For each notification received, the user
29872 + * must call pfm_restart() to indicate to the kernel that it is done
29873 + * processing the notification.
29875 + * When the caller is doing user level sampling, this function resets
29876 + * the overflowed counters and resumes monitoring which is normally stopped
29877 + * during notification (always the consequence of a counter overflow).
29879 + * When using a sampling format, the format restart() callback is invoked,
29880 + * overflowed PMDS may be reset based upon decision from sampling format.
29882 + * When operating in per-thread mode, and when not self-monitoring, the
29883 + * monitored thread DOES NOT need to be stopped, unlike for many other calls.
29885 + * This means that the effect of the restart may not necessarily be observed
29886 + * right when returning from the call. For instance, counters may not already
29887 + * be reset in the other thread.
29889 + * When operating in system-wide, the caller must be running on the monitored
29892 + * The context is locked and interrupts are disabled.
29894 + * info value upon return:
29895 + * - bit 0: when set, mudt issue complete() on restart semaphore
29897 +int __pfm_restart(struct pfm_context *ctx, int *info)
29901 + state = ctx->state;
29903 + PFM_DBG("state=%d can_restart=%d reset_count=%d",
29905 + ctx->flags.can_restart,
29906 + ctx->flags.reset_count);
29911 + case PFM_CTX_MASKED:
29913 + case PFM_CTX_LOADED:
29914 + if (ctx->smpl_addr && ctx->smpl_fmt->fmt_restart)
29917 + PFM_DBG("invalid state=%d", state);
29922 + * first check if allowed to restart, i.e., notifications received
29924 + if (!ctx->flags.can_restart) {
29925 + PFM_DBG("no restart can_restart=0");
29929 + pfm_stats_inc(pfm_restart_count);
29932 + * at this point, the context is either LOADED or MASKED
29934 + ctx->flags.can_restart--;
29937 + * handle self-monitoring case and system-wide
29939 + if (ctx->task == current || ctx->flags.system) {
29940 + pfm_resume_after_ovfl(ctx);
29945 + * restart another task
29949 + * if blocking, then post the semaphore if PFM_CTX_MASKED, i.e.
29950 + * the task is blocked or on its way to block. That's the normal
29951 + * restart path. If the monitoring is not masked, then the task
29952 + * can be actively monitoring and we cannot directly intervene.
29953 + * Therefore we use the trap mechanism to catch the task and
29954 + * force it to reset the buffer/reset PMDs.
29956 + * if non-blocking, then we ensure that the task will go into
29957 + * pfm_handle_work() before returning to user mode.
29959 + * We cannot explicitly reset another task, it MUST always
29960 + * be done by the task itself. This works for system wide because
29961 + * the tool that is controlling the session is logically doing
29962 + * "self-monitoring".
29964 + if (ctx->flags.block && state == PFM_CTX_MASKED) {
29965 + PFM_DBG("unblocking [%d]", ctx->task->pid);
29967 + * It is not possible to call complete() with the context locked
29968 + * otherwise we have a potential deadlock with the PMU context
29969 + * switch code due to a lock inversion between task_rq_lock()
29970 + * and the context lock.
29971 + * Instead we mark whether or not we need to issue the complete
29972 + * and we invoke the function once the context lock is released
29973 + * in sys_pfm_restart()
29977 + PFM_DBG("[%d] armed exit trap", ctx->task->pid);
29978 + pfm_post_work(ctx->task, ctx, PFM_WORK_RESET);
29980 + ctx->flags.reset_count++;
29985 + * pfm_get_smpl_arg -- copy user arguments to pfm_create_context() related to sampling format
29986 + * @name: format name as passed by user
29987 + * @fmt_arg: format optional argument as passed by user
29988 + * @uszie: size of structure pass in fmt_arg
29989 + * @arg: kernel copy of fmt_arg
29990 + * @fmt: pointer to sampling format upon success
29992 + * arg is kmalloc'ed, thus it needs a kfree by caller
29994 +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *fmt_uarg, size_t usize, void **arg,
29995 + struct pfm_smpl_fmt **fmt)
29997 + struct pfm_smpl_fmt *f;
29999 + void *addr = NULL;
30003 + fmt_name = getname(fmt_uname);
30005 + PFM_DBG("getname failed");
30010 + * find fmt and increase refcount
30012 + f = pfm_smpl_fmt_get(fmt_name);
30014 + putname(fmt_name);
30017 + PFM_DBG("buffer format not found");
30022 + * expected format argument size
30024 + sz = f->fmt_arg_size;
30027 + * check user size matches expected size
30028 + * usize = -1 is for IA-64 backward compatibility
30031 + if (sz != usize && usize != -1) {
30032 + PFM_DBG("invalid arg size %zu, format expects %zu",
30039 + addr = kmalloc(sz, GFP_KERNEL);
30040 + if (addr == NULL)
30044 + if (copy_from_user(addr, fmt_uarg, sz))
30053 + pfm_smpl_fmt_put(f);
30056 diff --git a/perfmon/perfmon_syscalls.c b/perfmon/perfmon_syscalls.c
30057 new file mode 100644
30058 index 0000000..8777b58
30060 +++ b/perfmon/perfmon_syscalls.c
30063 + * perfmon_syscalls.c: perfmon2 system call interface
30065 + * This file implements the perfmon2 interface which
30066 + * provides access to the hardware performance counters
30067 + * of the host processor.
30069 + * The initial version of perfmon.c was written by
30070 + * Ganesh Venkitachalam, IBM Corp.
30072 + * Then it was modified for perfmon-1.x by Stephane Eranian and
30073 + * David Mosberger, Hewlett Packard Co.
30075 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
30076 + * by Stephane Eranian, Hewlett Packard Co.
30078 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
30079 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
30080 + * David Mosberger-Tang <davidm@hpl.hp.com>
30082 + * More information about perfmon available at:
30083 + * http://perfmon2.sf.net
30085 + * This program is free software; you can redistribute it and/or
30086 + * modify it under the terms of version 2 of the GNU General Public
30087 + * License as published by the Free Software Foundation.
30089 + * This program is distributed in the hope that it will be useful,
30090 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
30091 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30092 + * General Public License for more details.
30094 + * You should have received a copy of the GNU General Public License
30095 + * along with this program; if not, write to the Free Software
30096 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30099 +#include <linux/kernel.h>
30100 +#include <linux/fs.h>
30101 +#include <linux/ptrace.h>
30102 +#include <linux/perfmon_kern.h>
30103 +#include <linux/uaccess.h>
30104 +#include "perfmon_priv.h"
30107 + * Context locking rules:
30108 + * ---------------------
30109 + * - any thread with access to the file descriptor of a context can
30110 + * potentially issue perfmon calls
30112 + * - calls must be serialized to guarantee correctness
30114 + * - as soon as a context is attached to a thread or CPU, it may be
30115 + * actively monitoring. On some architectures, such as IA-64, this
30116 + * is true even though the pfm_start() call has not been made. This
30117 + * comes from the fact that on some architectures, it is possible to
30118 + * start/stop monitoring from userland.
30120 + * - If monitoring is active, then there can PMU interrupts. Because
30121 + * context accesses must be serialized, the perfmon system calls
30122 + * must mask interrupts as soon as the context is attached.
30124 + * - perfmon system calls that operate with the context unloaded cannot
30125 + * assume it is actually unloaded when they are called. They first need
30126 + * to check and for that they need interrupts masked. Then, if the
30127 + * context is actually unloaded, they can unmask interrupts.
30129 + * - interrupt masking holds true for other internal perfmon functions as
30130 + * well. Except for PMU interrupt handler because those interrupts
30131 + * cannot be nested.
30133 + * - we mask ALL interrupts instead of just the PMU interrupt because we
30134 + * also need to protect against timer interrupts which could trigger
30137 +#ifdef CONFIG_UTRACE
30138 +#include <linux/utrace.h>
30141 +stopper_quiesce(struct utrace_attached_engine *engine, struct task_struct *tsk)
30143 + PFM_DBG("quiesced [%d]", tsk->pid);
30144 + complete(engine->data);
30145 + return UTRACE_ACTION_RESUME;
30149 +pfm_resume_task(struct task_struct *t, void *data)
30151 + PFM_DBG("utrace detach [%d]", t->pid);
30152 + (void) utrace_detach(t, data);
30155 +static const struct utrace_engine_ops utrace_ops =
30157 + .report_quiesce = stopper_quiesce,
30160 +static int pfm_wait_task_stopped(struct task_struct *task, void **data)
30162 + DECLARE_COMPLETION_ONSTACK(done);
30163 + struct utrace_attached_engine *eng;
30166 + eng = utrace_attach(task, UTRACE_ATTACH_CREATE, &utrace_ops, &done);
30168 + return PTR_ERR(eng);
30170 + ret = utrace_set_flags(task, eng,
30171 + UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE));
30172 + PFM_DBG("wait quiesce [%d]", task->pid);
30174 + ret = wait_for_completion_interruptible(&done);
30177 + (void) utrace_detach(task, eng);
30182 +#else /* !CONFIG_UTRACE */
30183 +static int pfm_wait_task_stopped(struct task_struct *task, void **data)
30190 + * returns 0 if cannot attach
30192 + ret = ptrace_may_access(task, PTRACE_MODE_ATTACH);
30193 + PFM_DBG("may_attach=%d", ret);
30197 + ret = ptrace_check_attach(task, 0);
30198 + PFM_DBG("check_attach=%d", ret);
30201 +void pfm_resume_task(struct task_struct *t, void *data)
30205 +struct pfm_syscall_cookie {
30206 + struct file *filp;
30211 + * cannot attach if :
30213 + * - task not owned by caller (checked by ptrace_may_attach())
30214 + * - task is dead or zombie
30215 + * - cannot use blocking notification when self-monitoring
30217 +static int pfm_task_incompatible(struct pfm_context *ctx,
30218 + struct task_struct *task)
30221 + * cannot attach to a kernel thread
30224 + PFM_DBG("cannot attach to kernel thread [%d]", task->pid);
30229 + * cannot use block on notification when
30230 + * self-monitoring.
30232 + if (ctx->flags.block && task == current) {
30233 + PFM_DBG("cannot use block on notification when self-monitoring"
30234 + "[%d]", task->pid);
30238 + * cannot attach to a zombie task
30240 + if (task->exit_state == EXIT_ZOMBIE || task->exit_state == EXIT_DEAD) {
30241 + PFM_DBG("cannot attach to zombie/dead task [%d]", task->pid);
30248 + * pfm_get_task -- check permission and acquire task to monitor
30249 + * @ctx: perfmon context
30250 + * @pid: identification of the task to check
30251 + * @task: upon return, a pointer to the task to monitor
30253 + * This function is used in per-thread mode only AND when not
30254 + * self-monitoring. It finds the task to monitor and checks
30255 + * that the caller has permissions to attach. It also checks
30256 + * that the task is stopped via ptrace so that we can safely
30257 + * modify its state.
30259 + * task refcount is incremented when succesful.
30261 +static int pfm_get_task(struct pfm_context *ctx, pid_t pid,
30262 + struct task_struct **task, void **data)
30264 + struct task_struct *p;
30265 + int ret = 0, ret1 = 0;
30270 + * When attaching to another thread we must ensure
30271 + * that the thread is actually stopped.
30273 + * As a consequence, only the ptracing parent can actually
30274 + * attach a context to a thread. Obviously, this constraint
30275 + * does not exist for self-monitoring threads.
30277 + * We use ptrace_may_attach() to check for permission.
30279 + read_lock(&tasklist_lock);
30281 + p = find_task_by_vpid(pid);
30283 + get_task_struct(p);
30285 + read_unlock(&tasklist_lock);
30288 + PFM_DBG("task not found %d", pid);
30292 + ret = pfm_task_incompatible(ctx, p);
30296 + ret = pfm_wait_task_stopped(p, data);
30304 + if (!(ret1 || ret))
30307 + put_task_struct(p);
30313 + * context must be locked when calling this function
30315 +int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
30316 + unsigned long *flags, void **resume)
30318 + struct task_struct *task;
30319 + unsigned long local_flags, new_flags;
30326 + * task is NULL for system-wide context
30328 + task = ctx->task;
30329 + state = ctx->state;
30330 + local_flags = *flags;
30332 + PFM_DBG("state=%d check_mask=0x%x", state, check_mask);
30334 + * if the context is detached, then we do not touch
30335 + * hardware, therefore there is not restriction on when we can
30338 + if (state == PFM_CTX_UNLOADED)
30341 + * no command can operate on a zombie context.
30342 + * A context becomes zombie when the file that identifies
30343 + * it is closed while the context is still attached to the
30344 + * thread it monitors.
30346 + if (state == PFM_CTX_ZOMBIE)
30350 + * at this point, state is PFM_CTX_LOADED or PFM_CTX_MASKED
30354 + * some commands require the context to be unloaded to operate
30356 + if (check_mask & PFM_CMD_UNLOADED) {
30357 + PFM_DBG("state=%d, cmd needs context unloaded", state);
30362 + * self-monitoring always ok.
30364 + if (task == current)
30368 + * for syswide, the calling thread must be running on the cpu
30369 + * the context is bound to.
30371 + if (ctx->flags.system) {
30372 + if (ctx->cpu != smp_processor_id())
30378 + * at this point, monitoring another thread
30382 + * the pfm_unload_context() command is allowed on masked context
30384 + if (state == PFM_CTX_MASKED && !(check_mask & PFM_CMD_UNLOAD))
30388 + * When we operate on another thread, we must wait for it to be
30389 + * stopped and completely off any CPU as we need to access the
30390 + * PMU state (or machine state).
30392 + * A thread can be put in the STOPPED state in various ways
30393 + * including PTRACE_ATTACH, or when it receives a SIGSTOP signal.
30394 + * We enforce that the thread must be ptraced, so it is stopped
30395 + * AND it CANNOT wake up while we operate on it because this
30396 + * would require an action from the ptracing parent which is the
30397 + * thread that is calling this function.
30399 + * The dependency on ptrace, imposes that only the ptracing
30400 + * parent can issue command on a thread. This is unfortunate
30401 + * but we do not know of a better way of doing this.
30403 + if (check_mask & PFM_CMD_STOPPED) {
30405 + spin_unlock_irqrestore(&ctx->lock, local_flags);
30408 + * check that the thread is ptraced AND STOPPED
30410 + ret = pfm_wait_task_stopped(task, resume);
30412 + spin_lock_irqsave(&ctx->lock, new_flags);
30415 + * flags may be different than when we released the lock
30417 + *flags = new_flags;
30422 + * we must recheck to verify if state has changed
30424 + if (unlikely(ctx->state != state)) {
30425 + PFM_DBG("old_state=%d new_state=%d",
30435 + * pfm_get_args - Function used to copy the syscall argument into kernel memory.
30436 + * @ureq: user argument
30437 + * @sz: user argument size
30438 + * @lsz: size of stack buffer
30439 + * @laddr: stack buffer address
30440 + * @req: point to start of kernel copy of the argument
30441 + * @ptr_free: address of kernel copy to free
30443 + * There are two options:
30444 + * - use a stack buffer described by laddr (addresses) and lsz (size)
30445 + * - allocate memory
30448 + * < 0 : in case of error (ptr_free may not be updated)
30450 + * - req: points to base of kernel copy of arguments
30451 + * - ptr_free: address of buffer to free by caller on exit.
30452 + * NULL if using the stack buffer
30454 + * when ptr_free is not NULL upon return, the caller must kfree()
30456 +int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
30457 + void **req, void **ptr_free)
30462 + * check syadmin argument limit
30464 + if (unlikely(sz > pfm_controls.arg_mem_max)) {
30465 + PFM_DBG("argument too big %zu max=%zu",
30467 + pfm_controls.arg_mem_max);
30472 + * check if vector fits on stack buffer
30475 + addr = kmalloc(sz, GFP_KERNEL);
30476 + if (unlikely(addr == NULL))
30478 + *ptr_free = addr;
30482 + *ptr_free = NULL;
30486 + * bring the data in
30488 + if (unlikely(copy_from_user(addr, ureq, sz))) {
30489 + if (addr != laddr)
30495 + * base address of kernel buffer
30503 + * pfm_acquire_ctx_from_fd -- get ctx from file descriptor
30504 + * @fd: file descriptor
30505 + * @ctx: pointer to pointer of context updated on return
30506 + * @cookie: opaque structure to use for release
30508 + * This helper function extracts the ctx from the file descriptor.
30509 + * It also increments the refcount of the file structure. Thus
30510 + * it updates the cookie so the refcount can be decreased when
30511 + * leaving the perfmon syscall via pfm_release_ctx_from_fd
30513 +static int pfm_acquire_ctx_from_fd(int fd, struct pfm_context **ctx,
30514 + struct pfm_syscall_cookie *cookie)
30516 + struct file *filp;
30519 + filp = fget_light(fd, &fput_needed);
30520 + if (unlikely(filp == NULL)) {
30521 + PFM_DBG("invalid fd %d", fd);
30525 + *ctx = filp->private_data;
30527 + if (unlikely(!*ctx || filp->f_op != &pfm_file_ops)) {
30528 + PFM_DBG("fd %d not related to perfmon", fd);
30531 + cookie->filp = filp;
30532 + cookie->fput_needed = fput_needed;
30538 + * pfm_release_ctx_from_fd -- decrease refcount of file associated with context
30539 + * @cookie: the cookie structure initialized by pfm_acquire_ctx_from_fd
30541 +static inline void pfm_release_ctx_from_fd(struct pfm_syscall_cookie *cookie)
30543 + fput_light(cookie->filp, cookie->fput_needed);
30547 + * unlike the other perfmon system calls, this one returns a file descriptor
30548 + * or a value < 0 in case of error, very much like open() or socket()
30550 +asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq,
30551 + char __user *fmt_name,
30552 + void __user *fmt_uarg, size_t fmt_size)
30554 + struct pfarg_ctx req;
30555 + struct pfm_smpl_fmt *fmt = NULL;
30556 + void *fmt_arg = NULL;
30559 + PFM_DBG("req=%p fmt=%p fmt_arg=%p size=%zu",
30560 + ureq, fmt_name, fmt_uarg, fmt_size);
30562 + if (perfmon_disabled)
30565 + if (copy_from_user(&req, ureq, sizeof(req)))
30569 + ret = pfm_get_smpl_arg(fmt_name, fmt_uarg, fmt_size, &fmt_arg, &fmt);
30574 + ret = __pfm_create_context(&req, fmt, fmt_arg, PFM_NORMAL, NULL);
30581 +asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, int count)
30583 + struct pfm_context *ctx;
30584 + struct task_struct *task;
30585 + struct pfm_syscall_cookie cookie;
30586 + struct pfarg_pmc pmcs[PFM_PMC_STK_ARG];
30587 + struct pfarg_pmc *req;
30588 + void *fptr, *resume;
30589 + unsigned long flags;
30593 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30595 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
30596 + PFM_DBG("invalid arg count %d", count);
30600 + sz = count*sizeof(*ureq);
30602 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30606 + ret = pfm_get_args(ureq, sz, sizeof(pmcs), pmcs, (void **)&req, &fptr);
30610 + spin_lock_irqsave(&ctx->lock, flags);
30612 + task = ctx->task;
30614 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30616 + ret = __pfm_write_pmcs(ctx, req, count);
30618 + spin_unlock_irqrestore(&ctx->lock, flags);
30621 + pfm_resume_task(task, resume);
30624 + * This function may be on the critical path.
30625 + * We want to avoid the branch if unecessary.
30630 + pfm_release_ctx_from_fd(&cookie);
30634 +asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, int count)
30636 + struct pfm_context *ctx;
30637 + struct task_struct *task;
30638 + struct pfm_syscall_cookie cookie;
30639 + struct pfarg_pmd pmds[PFM_PMD_STK_ARG];
30640 + struct pfarg_pmd *req;
30641 + void *fptr, *resume;
30642 + unsigned long flags;
30646 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30648 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
30649 + PFM_DBG("invalid arg count %d", count);
30653 + sz = count*sizeof(*ureq);
30655 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30659 + ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr);
30663 + spin_lock_irqsave(&ctx->lock, flags);
30665 + task = ctx->task;
30667 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30669 + ret = __pfm_write_pmds(ctx, req, count, 0);
30671 + spin_unlock_irqrestore(&ctx->lock, flags);
30674 + pfm_resume_task(task, resume);
30679 + pfm_release_ctx_from_fd(&cookie);
30683 +asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, int count)
30685 + struct pfm_context *ctx;
30686 + struct task_struct *task;
30687 + struct pfm_syscall_cookie cookie;
30688 + struct pfarg_pmd pmds[PFM_PMD_STK_ARG];
30689 + struct pfarg_pmd *req;
30690 + void *fptr, *resume;
30691 + unsigned long flags;
30695 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30697 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
30700 + sz = count*sizeof(*ureq);
30702 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30706 + ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr);
30710 + spin_lock_irqsave(&ctx->lock, flags);
30712 + task = ctx->task;
30714 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30716 + ret = __pfm_read_pmds(ctx, req, count);
30718 + spin_unlock_irqrestore(&ctx->lock, flags);
30720 + if (copy_to_user(ureq, req, sz))
30724 + pfm_resume_task(task, resume);
30729 + pfm_release_ctx_from_fd(&cookie);
30733 +asmlinkage long sys_pfm_restart(int fd)
30735 + struct pfm_context *ctx;
30736 + struct task_struct *task;
30737 + struct pfm_syscall_cookie cookie;
30739 + unsigned long flags;
30742 + PFM_DBG("fd=%d", fd);
30744 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30748 + spin_lock_irqsave(&ctx->lock, flags);
30750 + task = ctx->task;
30752 + ret = pfm_check_task_state(ctx, 0, &flags, &resume);
30754 + ret = __pfm_restart(ctx, &info);
30756 + spin_unlock_irqrestore(&ctx->lock, flags);
30759 + pfm_resume_task(task, resume);
30761 + * In per-thread mode with blocking notification, i.e.
30762 + * ctx->flags.blocking=1, we need to defer issuing the
30763 + * complete to unblock the blocked monitored thread.
30764 + * Otherwise we have a potential deadlock due to a lock
30765 + * inversion between the context lock and the task_rq_lock()
30766 + * which can happen if one thread is in this call and the other
30767 + * (the monitored thread) is in the context switch code.
30769 + * It is safe to access the context outside the critical section
30771 + * - we are protected by the fget_light(), thus the context
30772 + * cannot disappear
30774 + if (ret == 0 && info == 1)
30775 + complete(&ctx->restart_complete);
30777 + pfm_release_ctx_from_fd(&cookie);
30781 +asmlinkage long sys_pfm_stop(int fd)
30783 + struct pfm_context *ctx;
30784 + struct task_struct *task;
30785 + struct pfm_syscall_cookie cookie;
30787 + unsigned long flags;
30789 + int release_info;
30791 + PFM_DBG("fd=%d", fd);
30793 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30797 + spin_lock_irqsave(&ctx->lock, flags);
30799 + task = ctx->task;
30801 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30803 + ret = __pfm_stop(ctx, &release_info);
30805 + spin_unlock_irqrestore(&ctx->lock, flags);
30808 + pfm_resume_task(task, resume);
30811 + * defer cancellation of timer to avoid race
30812 + * with pfm_handle_switch_timeout()
30814 + * applies only when self-monitoring
30816 + if (release_info & 0x2)
30817 + hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
30819 + pfm_release_ctx_from_fd(&cookie);
30823 +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq)
30825 + struct pfm_context *ctx;
30826 + struct task_struct *task;
30827 + struct pfm_syscall_cookie cookie;
30829 + struct pfarg_start req;
30830 + unsigned long flags;
30833 + PFM_DBG("fd=%d req=%p", fd, ureq);
30835 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30840 + * the one argument is actually optional
30842 + if (ureq && copy_from_user(&req, ureq, sizeof(req)))
30845 + spin_lock_irqsave(&ctx->lock, flags);
30847 + task = ctx->task;
30849 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30851 + ret = __pfm_start(ctx, ureq ? &req : NULL);
30853 + spin_unlock_irqrestore(&ctx->lock, flags);
30856 + pfm_resume_task(task, resume);
30858 + pfm_release_ctx_from_fd(&cookie);
30862 +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq)
30864 + struct pfm_context *ctx;
30865 + struct task_struct *task;
30866 + struct pfm_syscall_cookie cookie;
30867 + void *resume, *dummy_resume;
30868 + unsigned long flags;
30869 + struct pfarg_load req;
30872 + PFM_DBG("fd=%d req=%p", fd, ureq);
30874 + if (copy_from_user(&req, ureq, sizeof(req)))
30877 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30884 + * in per-thread mode (not self-monitoring), get a reference
30885 + * on task to monitor. This must be done with interrupts enabled
30886 + * Upon succesful return, refcount on task is increased.
30888 + * fget_light() is protecting the context.
30890 + if (!ctx->flags.system && req.load_pid != current->pid) {
30891 + ret = pfm_get_task(ctx, req.load_pid, &task, &resume);
30897 + * irqsave is required to avoid race in case context is already
30898 + * loaded or with switch timeout in the case of self-monitoring
30900 + spin_lock_irqsave(&ctx->lock, flags);
30902 + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &dummy_resume);
30904 + ret = __pfm_load_context(ctx, &req, task);
30906 + spin_unlock_irqrestore(&ctx->lock, flags);
30909 + pfm_resume_task(task, resume);
30912 + * in per-thread mode (not self-monitoring), we need
30913 + * to decrease refcount on task to monitor:
30914 + * - load successful: we have a reference to the task in ctx->task
30915 + * - load failed : undo the effect of pfm_get_task()
30917 + if (task != current)
30918 + put_task_struct(task);
30920 + pfm_release_ctx_from_fd(&cookie);
30924 +asmlinkage long sys_pfm_unload_context(int fd)
30926 + struct pfm_context *ctx;
30927 + struct task_struct *task;
30928 + struct pfm_syscall_cookie cookie;
30930 + unsigned long flags;
30932 + int is_system, release_info = 0;
30935 + PFM_DBG("fd=%d", fd);
30937 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30941 + is_system = ctx->flags.system;
30943 + spin_lock_irqsave(&ctx->lock, flags);
30946 + task = ctx->task;
30948 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED|PFM_CMD_UNLOAD,
30949 + &flags, &resume);
30951 + ret = __pfm_unload_context(ctx, &release_info);
30953 + spin_unlock_irqrestore(&ctx->lock, flags);
30956 + pfm_resume_task(task, resume);
30959 + * cancel time now that context is unlocked
30960 + * avoid race with pfm_handle_switch_timeout()
30962 + if (release_info & 0x2) {
30964 + r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
30965 + PFM_DBG("timeout cancel=%d", r);
30968 + if (release_info & 0x1)
30969 + pfm_session_release(is_system, cpu);
30971 + pfm_release_ctx_from_fd(&cookie);
30975 +asmlinkage long sys_pfm_create_evtsets(int fd, struct pfarg_setdesc __user *ureq, int count)
30977 + struct pfm_context *ctx;
30978 + struct pfm_syscall_cookie cookie;
30979 + struct pfarg_setdesc *req;
30980 + void *fptr, *resume;
30981 + unsigned long flags;
30985 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30987 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
30990 + sz = count*sizeof(*ureq);
30992 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30996 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
31001 + * must mask interrupts because we do not know the state of context,
31002 + * could be attached and we could be getting PMU interrupts. So
31003 + * we mask and lock context and we check and possibly relax masking
31005 + spin_lock_irqsave(&ctx->lock, flags);
31007 + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume);
31009 + ret = __pfm_create_evtsets(ctx, req, count);
31011 + spin_unlock_irqrestore(&ctx->lock, flags);
31013 + * context must be unloaded for this command. The resume pointer
31014 + * is necessarily NULL, thus no need to call pfm_resume_task()
31019 + pfm_release_ctx_from_fd(&cookie);
31023 +asmlinkage long sys_pfm_getinfo_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count)
31025 + struct pfm_context *ctx;
31026 + struct task_struct *task;
31027 + struct pfm_syscall_cookie cookie;
31028 + struct pfarg_setinfo *req;
31029 + void *fptr, *resume;
31030 + unsigned long flags;
31034 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
31036 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
31039 + sz = count*sizeof(*ureq);
31041 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
31045 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
31050 + * this command operates even when context is loaded, so we need
31051 + * to keep interrupts masked to avoid a race with PMU interrupt
31052 + * which may switch the active set
31054 + spin_lock_irqsave(&ctx->lock, flags);
31056 + task = ctx->task;
31058 + ret = pfm_check_task_state(ctx, 0, &flags, &resume);
31060 + ret = __pfm_getinfo_evtsets(ctx, req, count);
31062 + spin_unlock_irqrestore(&ctx->lock, flags);
31065 + pfm_resume_task(task, resume);
31067 + if (copy_to_user(ureq, req, sz))
31072 + pfm_release_ctx_from_fd(&cookie);
31076 +asmlinkage long sys_pfm_delete_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count)
31078 + struct pfm_context *ctx;
31079 + struct pfm_syscall_cookie cookie;
31080 + struct pfarg_setinfo *req;
31081 + void *fptr, *resume;
31082 + unsigned long flags;
31086 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
31088 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
31091 + sz = count*sizeof(*ureq);
31093 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
31097 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
31102 + * must mask interrupts because we do not know the state of context,
31103 + * could be attached and we could be getting PMU interrupts
31105 + spin_lock_irqsave(&ctx->lock, flags);
31107 + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume);
31109 + ret = __pfm_delete_evtsets(ctx, req, count);
31111 + spin_unlock_irqrestore(&ctx->lock, flags);
31113 + * context must be unloaded for this command. The resume pointer
31114 + * is necessarily NULL, thus no need to call pfm_resume_task()
31119 + pfm_release_ctx_from_fd(&cookie);
31122 diff --git a/perfmon/perfmon_sysfs.c b/perfmon/perfmon_sysfs.c
31123 new file mode 100644
31124 index 0000000..7353c3b
31126 +++ b/perfmon/perfmon_sysfs.c
31129 + * perfmon_sysfs.c: perfmon2 sysfs interface
31131 + * This file implements the perfmon2 interface which
31132 + * provides access to the hardware performance counters
31133 + * of the host processor.
31135 + * The initial version of perfmon.c was written by
31136 + * Ganesh Venkitachalam, IBM Corp.
31138 + * Then it was modified for perfmon-1.x by Stephane Eranian and
31139 + * David Mosberger, Hewlett Packard Co.
31141 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
31142 + * by Stephane Eranian, Hewlett Packard Co.
31144 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
31145 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
31146 + * David Mosberger-Tang <davidm@hpl.hp.com>
31148 + * More information about perfmon available at:
31149 + * http://perfmon2.sf.net
31151 + * This program is free software; you can redistribute it and/or
31152 + * modify it under the terms of version 2 of the GNU General Public
31153 + * License as published by the Free Software Foundation.
31155 + * This program is distributed in the hope that it will be useful,
31156 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
31157 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31158 + * General Public License for more details.
31160 + * You should have received a copy of the GNU General Public License
31161 + * along with this program; if not, write to the Free Software
31162 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31165 +#include <linux/kernel.h>
31166 +#include <linux/module.h> /* for EXPORT_SYMBOL */
31167 +#include <linux/perfmon_kern.h>
31168 +#include "perfmon_priv.h"
31170 +struct pfm_attribute {
31171 + struct attribute attr;
31172 + ssize_t (*show)(void *, struct pfm_attribute *attr, char *);
31173 + ssize_t (*store)(void *, const char *, size_t);
31175 +#define to_attr(n) container_of(n, struct pfm_attribute, attr);
31177 +#define PFM_RO_ATTR(_name, _show) \
31178 + struct kobj_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
31180 +#define PFM_RW_ATTR(_name, _show, _store) \
31181 + struct kobj_attribute attr_##_name = __ATTR(_name, 0644, _show, _store)
31183 +#define PFM_ROS_ATTR(_name, _show) \
31184 + struct pfm_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
31186 +#define is_attr_name(a, n) (!strcmp((a)->attr.name, n))
31187 +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
31189 +static struct kobject *pfm_kernel_kobj, *pfm_fmt_kobj;
31190 +static struct kobject *pfm_pmu_kobj;
31192 +static ssize_t pfm_regs_attr_show(struct kobject *kobj,
31193 + struct attribute *attr, char *buf)
31195 + struct pfm_regmap_desc *reg = to_reg(kobj);
31196 + struct pfm_attribute *attribute = to_attr(attr);
31197 + return attribute->show ? attribute->show(reg, attribute, buf) : -EIO;
31200 +static ssize_t pfm_fmt_attr_show(struct kobject *kobj,
31201 + struct attribute *attr, char *buf)
31203 + struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj);
31204 + struct pfm_attribute *attribute = to_attr(attr);
31205 + return attribute->show ? attribute->show(fmt, attribute, buf) : -EIO;
31208 +static struct sysfs_ops pfm_regs_sysfs_ops = {
31209 + .show = pfm_regs_attr_show
31212 +static struct sysfs_ops pfm_fmt_sysfs_ops = {
31213 + .show = pfm_fmt_attr_show
31216 +static struct kobj_type pfm_regs_ktype = {
31217 + .sysfs_ops = &pfm_regs_sysfs_ops,
31220 +static struct kobj_type pfm_fmt_ktype = {
31221 + .sysfs_ops = &pfm_fmt_sysfs_ops,
31224 +static ssize_t pfm_controls_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
31228 + if (is_attr_name(attr, "version"))
31229 + return snprintf(buf, PAGE_SIZE, "%u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN);
31231 + if (is_attr_name(attr, "task_sessions_count"))
31232 + return pfm_sysfs_res_show(buf, PAGE_SIZE, 0);
31234 + if (is_attr_name(attr, "debug"))
31235 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug);
31237 + if (is_attr_name(attr, "task_group"))
31238 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group);
31240 + if (is_attr_name(attr, "mode"))
31241 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.flags);
31243 + if (is_attr_name(attr, "arg_mem_max"))
31244 + return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_mem_max);
31246 + if (is_attr_name(attr, "syscall")) {
31247 + base = pfm_arch_get_base_syscall();
31248 + return snprintf(buf, PAGE_SIZE, "%d\n", base);
31251 + if (is_attr_name(attr, "sys_sessions_count"))
31252 + return pfm_sysfs_res_show(buf, PAGE_SIZE, 1);
31254 + if (is_attr_name(attr, "smpl_buffer_mem_max"))
31255 + return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buffer_mem_max);
31257 + if (is_attr_name(attr, "smpl_buffer_mem_cur"))
31258 + return pfm_sysfs_res_show(buf, PAGE_SIZE, 2);
31260 + if (is_attr_name(attr, "sys_group"))
31261 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group);
31263 + /* XXX: could be set to write-only */
31264 + if (is_attr_name(attr, "reset_stats")) {
31267 + return strnlen(buf, PAGE_SIZE);
31272 +static ssize_t pfm_controls_store(struct kobject *kobj, struct kobj_attribute *attr,
31273 + const char *buf, size_t count)
31278 + if (sscanf(buf, "%zu", &d) != 1)
31281 + if (is_attr_name(attr, "debug"))
31282 + pfm_controls.debug = d;
31284 + if (is_attr_name(attr, "task_group"))
31285 + pfm_controls.task_group = d;
31287 + if (is_attr_name(attr, "sys_group"))
31288 + pfm_controls.sys_group = d;
31290 + if (is_attr_name(attr, "mode"))
31291 + pfm_controls.flags = d ? PFM_CTRL_FL_RW_EXPERT : 0;
31293 + if (is_attr_name(attr, "arg_mem_max")) {
31295 + * we impose a page as the minimum.
31297 + * This limit may be smaller than the stack buffer
31298 + * available and that is fine.
31300 + if (d >= PAGE_SIZE)
31301 + pfm_controls.arg_mem_max = d;
31303 + if (is_attr_name(attr, "reset_stats")) {
31304 + for_each_online_cpu(i) {
31305 + pfm_reset_stats(i);
31309 + if (is_attr_name(attr, "smpl_buffer_mem_max")) {
31310 + if (d >= PAGE_SIZE)
31311 + pfm_controls.smpl_buffer_mem_max = d;
31318 + * /sys/kernel/perfmon attributes
31320 +static PFM_RO_ATTR(version, pfm_controls_show);
31321 +static PFM_RO_ATTR(task_sessions_count, pfm_controls_show);
31322 +static PFM_RO_ATTR(syscall, pfm_controls_show);
31323 +static PFM_RO_ATTR(sys_sessions_count, pfm_controls_show);
31324 +static PFM_RO_ATTR(smpl_buffer_mem_cur, pfm_controls_show);
31326 +static PFM_RW_ATTR(debug, pfm_controls_show, pfm_controls_store);
31327 +static PFM_RW_ATTR(task_group, pfm_controls_show, pfm_controls_store);
31328 +static PFM_RW_ATTR(mode, pfm_controls_show, pfm_controls_store);
31329 +static PFM_RW_ATTR(sys_group, pfm_controls_show, pfm_controls_store);
31330 +static PFM_RW_ATTR(arg_mem_max, pfm_controls_show, pfm_controls_store);
31331 +static PFM_RW_ATTR(smpl_buffer_mem_max, pfm_controls_show, pfm_controls_store);
31332 +static PFM_RW_ATTR(reset_stats, pfm_controls_show, pfm_controls_store);
31334 +static struct attribute *pfm_kernel_attrs[] = {
31335 + &attr_version.attr,
31336 + &attr_syscall.attr,
31337 + &attr_task_sessions_count.attr,
31338 + &attr_sys_sessions_count.attr,
31339 + &attr_smpl_buffer_mem_cur.attr,
31340 + &attr_debug.attr,
31341 + &attr_reset_stats.attr,
31342 + &attr_sys_group.attr,
31343 + &attr_task_group.attr,
31345 + &attr_smpl_buffer_mem_max.attr,
31346 + &attr_arg_mem_max.attr,
31350 +static struct attribute_group pfm_kernel_attr_group = {
31351 + .attrs = pfm_kernel_attrs,
31355 + * per-reg attributes
31357 +static ssize_t pfm_reg_show(void *data, struct pfm_attribute *attr, char *buf)
31359 + struct pfm_regmap_desc *reg;
31364 + if (is_attr_name(attr, "name"))
31365 + return snprintf(buf, PAGE_SIZE, "%s\n", reg->desc);
31367 + if (is_attr_name(attr, "dfl_val"))
31368 + return snprintf(buf, PAGE_SIZE, "0x%llx\n",
31369 + (unsigned long long)reg->dfl_val);
31371 + if (is_attr_name(attr, "width")) {
31372 + w = (reg->type & PFM_REG_C64) ?
31373 + pfm_pmu_conf->counter_width : 64;
31374 + return snprintf(buf, PAGE_SIZE, "%d\n", w);
31377 + if (is_attr_name(attr, "rsvd_msk"))
31378 + return snprintf(buf, PAGE_SIZE, "0x%llx\n",
31379 + (unsigned long long)reg->rsvd_msk);
31381 + if (is_attr_name(attr, "addr"))
31382 + return snprintf(buf, PAGE_SIZE, "0x%lx\n", reg->hw_addr);
31387 +static PFM_ROS_ATTR(name, pfm_reg_show);
31388 +static PFM_ROS_ATTR(dfl_val, pfm_reg_show);
31389 +static PFM_ROS_ATTR(rsvd_msk, pfm_reg_show);
31390 +static PFM_ROS_ATTR(width, pfm_reg_show);
31391 +static PFM_ROS_ATTR(addr, pfm_reg_show);
31393 +static struct attribute *pfm_reg_attrs[] = {
31395 + &attr_dfl_val.attr,
31396 + &attr_rsvd_msk.attr,
31397 + &attr_width.attr,
31402 +static struct attribute_group pfm_reg_attr_group = {
31403 + .attrs = pfm_reg_attrs,
31406 +static ssize_t pfm_pmu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
31408 + if (is_attr_name(attr, "model"))
31409 + return snprintf(buf, PAGE_SIZE, "%s\n", pfm_pmu_conf->pmu_name);
31412 +static PFM_RO_ATTR(model, pfm_pmu_show);
31414 +static struct attribute *pfm_pmu_desc_attrs[] = {
31415 + &attr_model.attr,
31419 +static struct attribute_group pfm_pmu_desc_attr_group = {
31420 + .attrs = pfm_pmu_desc_attrs,
31423 +static int pfm_sysfs_add_pmu_regs(struct pfm_pmu_config *pmu)
31425 + struct pfm_regmap_desc *reg;
31426 + unsigned int i, k;
31429 + reg = pmu->pmc_desc;
31430 + for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
31432 + if (!(reg->type & PFM_REG_I))
31435 + ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype,
31436 + pfm_pmu_kobj, "pmc%u", i);
31440 + ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group);
31442 + kobject_del(®->kobj);
31447 + reg = pmu->pmd_desc;
31448 + for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
31450 + if (!(reg->type & PFM_REG_I))
31453 + ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype,
31454 + pfm_pmu_kobj, "pmd%u", i);
31458 + ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group);
31460 + kobject_del(®->kobj);
31466 + reg = pmu->pmd_desc;
31467 + for (k = 0; k < i; k++, reg++) {
31468 + if (!(reg->type & PFM_REG_I))
31470 + sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
31471 + kobject_del(®->kobj);
31473 + i = pmu->num_pmc_entries;
31474 + /* fall through */
31476 + reg = pmu->pmc_desc;
31477 + for (k = 0; k < i; k++, reg++) {
31478 + if (!(reg->type & PFM_REG_I))
31480 + sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
31481 + kobject_del(®->kobj);
31486 +static int pfm_sysfs_del_pmu_regs(struct pfm_pmu_config *pmu)
31488 + struct pfm_regmap_desc *reg;
31491 + reg = pmu->pmc_desc;
31492 + for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
31494 + if (!(reg->type & PFM_REG_I))
31497 + sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
31498 + kobject_del(®->kobj);
31501 + reg = pmu->pmd_desc;
31502 + for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
31504 + if (!(reg->type & PFM_REG_I))
31507 + sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
31508 + kobject_del(®->kobj);
31514 + * when a PMU description module is inserted, we create
31515 + * a pmu_desc subdir in sysfs and we populate it with
31516 + * PMU specific information, such as register mappings
31518 +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu)
31522 + pfm_pmu_kobj = kobject_create_and_add("pmu_desc", pfm_kernel_kobj);
31523 + if (!pfm_pmu_kobj)
31526 + ret = sysfs_create_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
31528 + /* will release pfm_pmu_kobj */
31529 + kobject_put(pfm_pmu_kobj);
31533 + ret = pfm_sysfs_add_pmu_regs(pmu);
31535 + sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
31536 + /* will release pfm_pmu_kobj */
31537 + kobject_put(pfm_pmu_kobj);
31539 + kobject_uevent(pfm_pmu_kobj, KOBJ_ADD);
31545 + * when a PMU description module is removed, we also remove
31546 + * all its information from sysfs, i.e., the pmu_desc subdir
31549 +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu)
31551 + pfm_sysfs_del_pmu_regs(pmu);
31552 + sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
31553 + kobject_uevent(pfm_pmu_kobj, KOBJ_REMOVE);
31554 + kobject_put(pfm_pmu_kobj);
31555 + pfm_pmu_kobj = NULL;
31559 +static ssize_t pfm_fmt_show(void *data, struct pfm_attribute *attr, char *buf)
31561 + struct pfm_smpl_fmt *fmt = data;
31563 + if (is_attr_name(attr, "version"))
31564 + return snprintf(buf, PAGE_SIZE, "%u.%u\n",
31565 + fmt->fmt_version >> 16 & 0xffff,
31566 + fmt->fmt_version & 0xffff);
31571 + * do not use predefined macros because of name conflict
31572 + * with /sys/kernel/perfmon/version
31574 +struct pfm_attribute attr_fmt_version = {
31575 + .attr = { .name = "version", .mode = 0444 },
31576 + .show = pfm_fmt_show,
31579 +static struct attribute *pfm_fmt_attrs[] = {
31580 + &attr_fmt_version.attr,
31584 +static struct attribute_group pfm_fmt_attr_group = {
31585 + .attrs = pfm_fmt_attrs,
31589 + * when a sampling format module is inserted, we populate
31590 + * sysfs with some information
31592 +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt)
31596 + ret = kobject_init_and_add(&fmt->kobj, &pfm_fmt_ktype,
31597 + pfm_fmt_kobj, fmt->fmt_name);
31601 + ret = sysfs_create_group(&fmt->kobj, &pfm_fmt_attr_group);
31603 + kobject_del(&fmt->kobj);
31605 + kobject_uevent(&fmt->kobj, KOBJ_ADD);
31611 + * when a sampling format module is removed, its information
31612 + * must also be removed from sysfs
31614 +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt)
31616 + sysfs_remove_group(&fmt->kobj, &pfm_fmt_attr_group);
31617 + kobject_uevent(&fmt->kobj, KOBJ_REMOVE);
31618 + kobject_del(&fmt->kobj);
31621 +int __init pfm_init_sysfs(void)
31625 + pfm_kernel_kobj = kobject_create_and_add("perfmon", kernel_kobj);
31626 + if (!pfm_kernel_kobj) {
31627 + PFM_ERR("cannot add kernel object: /sys/kernel/perfmon");
31631 + ret = sysfs_create_group(pfm_kernel_kobj, &pfm_kernel_attr_group);
31633 + kobject_put(pfm_kernel_kobj);
31637 + pfm_fmt_kobj = kobject_create_and_add("formats", pfm_kernel_kobj);
31639 + PFM_ERR("cannot add fmt object: %d", ret);
31642 + if (pfm_pmu_conf)
31643 + pfm_sysfs_add_pmu(pfm_pmu_conf);
31645 + pfm_sysfs_builtin_fmt_add();
31650 + kobject_del(pfm_kernel_kobj);