[ar71xx] create firmware images for the Planex MZK-W04NU board
[openwrt.git] / target / linux / ps3 / patches-2.6.27 / 001-perfmon-2.6.27.patch
1 diff --git a/Documentation/ABI/testing/sysfs-perfmon b/Documentation/ABI/testing/sysfs-perfmon
2 new file mode 100644
3 index 0000000..bde434c
4 --- /dev/null
5 +++ b/Documentation/ABI/testing/sysfs-perfmon
6 @@ -0,0 +1,87 @@
7 +What: /sys/kernel/perfmon
8 +Date: Nov 2007
9 +KernelVersion: 2.6.24
10 +Contact: eranian@gmail.com
11 +
12 +Description: provide the configuration interface for the perfmon2 subsystems.
13 + The tree contains information about the detected hardware, current
14 + state of the subsystem as well as some configuration parameters.
15 +
16 + The tree consists of the following entries:
17 +
18 + /sys/kernel/perfmon/debug (read-write):
19 +
20 + Enable perfmon2 debugging output via klogd. Debug messages produced during
21 + PMU interrupt handling are not controlled by this entry. The traces a rate-limited
22 + to avoid flooding of the console. It is possible to change the throttling
23 + via /proc/sys/kernel/printk_ratelimit. The value is interpreted as a bitmask.
24 + Each bit enables a particular type of debug messages. Refer to the file
25 + include/linux/perfmon_kern.h for more information
26 +
27 + /sys/kernel/perfmon/pmc_max_fast_arg (read-only):
28 +
29 + Number of perfmon2 syscall arguments copied directly onto the
30 + stack (copy_from_user) for pfm_write_pmcs(). Copying to the stack avoids
31 + having to allocate a buffer. The unit is the number of pfarg_pmc_t
32 + structures.
33 +
34 + /sys/kernel/perfmon/pmd_max_fast_arg (read-only):
35 +
36 + Number of perfmon2 syscall arguments copied directly onto the
37 + stack (copy_from_user) for pfm_write_pmds()/pfm_read_pmds(). Copying
38 + to the stack avoids having to allocate a buffer. The unit is the number
39 + of pfarg_pmd_t structures.
40 +
41 +
42 + /sys/kernel/perfmon/reset_stats (write-only):
43 +
44 + Reset the statistics collected by perfmon2. Stats are available
45 + per-cpu via debugfs.
46 +
47 + /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only):
48 +
49 + Reports the amount of memory currently dedicated to sampling
50 + buffers by the kernel. The unit is byte.
51 +
52 + /sys/kernel/perfmon/smpl_buffer_mem_max (read-write):
53 +
54 + Maximum amount of kernel memory usable for sampling buffers. -1 means
55 + everything that is available. Unit is byte.
56 +
57 + /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only):
58 +
59 + Current utilization of kernel memory in bytes.
60 +
61 + /sys/kernel/perfmon/sys_group (read-write):
62 +
63 + Users group allowed to create a system-wide perfmon2 context (session).
64 + -1 means any group. This control will be kept until we find a package
65 + able to control capabilities via PAM.
66 +
67 + /sys/kernel/perfmon/task_group (read-write):
68 +
69 + Users group allowed to create a per-thread context (session).
70 + -1 means any group. This control will be kept until we find a
71 + package able to control capabilities via PAM.
72 +
73 + /sys/kernel/perfmon/sys_sessions_count (read-only):
74 +
75 + Number of system-wide contexts currently attached to CPUs.
76 +
77 + /sys/kernel/perfmon/task_sessions_count (read-only):
78 +
79 + Number of per-thread contexts currently attached to threads.
80 +
81 + /sys/kernel/perfmon/version (read-only):
82 +
83 + Perfmon2 interface revision number.
84 +
85 + /sys/kernel/perfmon/arg_mem_max(read-write):
86 +
87 + Maximum size of vector arguments expressed in bytes. Can be modified
88 +
89 + /sys/kernel/perfmon/mode(read-write):
90 +
91 + Bitmask to enable/disable certain perfmon2 features.
92 + Currently defined:
93 + - bit 0: if set, then reserved bitfield are ignored on PMC writes
94 diff --git a/Documentation/ABI/testing/sysfs-perfmon-fmt b/Documentation/ABI/testing/sysfs-perfmon-fmt
95 new file mode 100644
96 index 0000000..1b45270
97 --- /dev/null
98 +++ b/Documentation/ABI/testing/sysfs-perfmon-fmt
99 @@ -0,0 +1,18 @@
100 +What: /sys/kernel/perfmon/formats
101 +Date: 2007
102 +KernelVersion: 2.6.24
103 +Contact: eranian@gmail.com
104 +
105 +Description: provide description of available perfmon2 custom sampling buffer formats
106 + which are implemented as independent kernel modules. Each formats gets
107 + a subdir which a few entries.
108 +
109 + The name of the subdir is the name of the sampling format. The same name
110 + must be passed to pfm_create_context() to use the format.
111 +
112 + Each subdir XX contains the following entries:
113 +
114 + /sys/kernel/perfmon/formats/XX/version (read-only):
115 +
116 + Version number of the format in clear text and null terminated.
117 +
118 diff --git a/Documentation/ABI/testing/sysfs-perfmon-pmu b/Documentation/ABI/testing/sysfs-perfmon-pmu
119 new file mode 100644
120 index 0000000..a1afc7e
121 --- /dev/null
122 +++ b/Documentation/ABI/testing/sysfs-perfmon-pmu
123 @@ -0,0 +1,46 @@
124 +What: /sys/kernel/perfmon/pmu
125 +Date: Nov 2007
126 +KernelVersion: 2.6.24
127 +Contact: eranian@gmail.com
128 +
129 +Description: provide information about the currently loaded PMU description module.
130 + The module contains the mapping of the actual performance counter registers
131 + onto the logical PMU exposed by perfmon. There is at most one PMU description
132 + module loaded at any time.
133 +
134 + The sysfs PMU tree provides a description of the mapping for each register.
135 + There is one subdir per config and data registers along an entry for the
136 + name of the PMU model.
137 +
138 + The model entry is as follows:
139 +
140 + /sys/kernel/perfmon/pmu_desc/model (read-only):
141 +
142 + Name of the PMU model is clear text and zero terminated.
143 +
144 + Then for each logical PMU register, XX, gets a subtree with the following entries:
145 +
146 + /sys/kernel/perfmon/pmu_desc/pm*XX/addr (read-only):
147 +
148 + The physical address or index of the actual underlying hardware register.
149 + On Itanium, it corresponds to the index. But on X86 processor, this is
150 + the actual MSR address.
151 +
152 + /sys/kernel/perfmon/pmu_desc/pm*XX/dfl_val (read-only):
153 +
154 + The default value of the register in hexadecimal.
155 +
156 + /sys/kernel/perfmon/pmu_desc/pm*XX/name (read-only):
157 +
158 + The name of the hardware register.
159 +
160 + /sys/kernel/perfmon/pmu_desc/pm*XX/rsvd_msk (read-only):
161 +
162 + The bitmask of reserved bits, i.e., bits which cannot be changed by
163 + applications. When a bit is set, it means the corresponding bit in the
164 + actual register is reserved.
165 +
166 + /sys/kernel/perfmon/pmu_desc/pm*XX/width (read-only):
167 +
168 + the width in bits of the registers. This field is only relevant for counter
169 + registers.
170 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
171 index 1150444..2652b6c 100644
172 --- a/Documentation/kernel-parameters.txt
173 +++ b/Documentation/kernel-parameters.txt
174 @@ -1643,6 +1643,9 @@ and is between 256 and 4096 characters. It is defined in the file
175 Format: { 0 | 1 }
176 See arch/parisc/kernel/pdc_chassis.c
177
178 + perfmon_debug [PERFMON] Enables Perfmon debug messages. Needed
179 + to see traces of the early startup startup phase.
180 +
181 pf. [PARIDE]
182 See Documentation/paride.txt.
183
184 diff --git a/Documentation/perfmon2-debugfs.txt b/Documentation/perfmon2-debugfs.txt
185 new file mode 100644
186 index 0000000..b30cae8
187 --- /dev/null
188 +++ b/Documentation/perfmon2-debugfs.txt
189 @@ -0,0 +1,126 @@
190 + The perfmon2 debug and statistics interface
191 + ------------------------------------------
192 + Stephane Eranian
193 + <eranian@gmail.com>
194 +
195 +The perfmon2 interfaces exports a set of statistics which are used to tune and
196 +debug the implementation. The data is composed of a set of very simple metrics
197 +mostly aggregated counts and durations. They instruments key points in the
198 +perfmon2 code, such as context switch and interrupt handling.
199 +
200 +The data is accessible via the debug filesystem (debugfs). Thus you need to
201 +have the filesystem support enabled in your kernel. Furthermore since, 2.6.25,
202 +the perfmon2 statistics interface is an optional component. It needs to be
203 +explicitely enabled in the kernel config file (CONFIG_PERFMON_DEBUG_FS).
204 +
205 +To access the data, the debugs filesystem must be mounted. Supposing the mount
206 +point is /debugfs, you would need to do:
207 + $ mount -t debugs none /debugfs
208 +
209 +The data is located under the perfmon subdirectory and is organized per CPU.
210 +For each CPU, the same set of metrics is available, one metric per file in
211 +clear ASCII text.
212 +
213 +The metrics are as follows:
214 +
215 + ctxswin_count (read-only):
216 +
217 + Number of PMU context switch in.
218 +
219 + ctxswin_ns (read-only):
220 +
221 + Number of nanoseconds spent in the PMU context switch in
222 + routine. Dividing this number by the value of ctxswin_count,
223 + yields average cost of the PMU context switch in.
224 +
225 + ctxswout_count (read-only):
226 +
227 + Number of PMU context switch out.
228 +
229 + ctxswout_ns (read-only):
230 +
231 + Number of nanoseconds spent in the PMU context switch in
232 + routine. Dividing this number by the value of ctxswout_count,
233 + yields average cost of the PMU context switch out.
234 +
235 + fmt_handler_calls (read-only):
236 +
237 + Number of calls to the sampling format routine that handles
238 + PMU interrupts, i.e., typically the routine that records a
239 + sample.
240 +
241 + fmt_handler_ns (read-only):
242 +
243 + Number of nanoseconds spent in the routine that handle PMU
244 + interrupt in the sampling format. Dividing this number by
245 + the number of calls provided by fmt_handler_calls, yields
246 + average time spent in this routine.
247 +
248 + ovfl_intr_all_count (read-only):
249 +
250 + Number of PMU interrupts received by the kernel.
251 +
252 +
253 + ovfl_intr_nmi_count (read-only):
254 +
255 + Number of Non Maskeable Interrupts (NMI) received by the kernel
256 + for perfmon. This is relevant only on X86 hardware.
257 +
258 + ovfl_intr_ns (read-only):
259 +
260 + Number of nanoseconds spent in the perfmon2 PMU interrupt
261 + handler routine. Dividing this number of ovfl_intr_all_count
262 + yields the average time to handle one PMU interrupt.
263 +
264 + ovfl_intr_regular_count (read-only):
265 +
266 + Number of PMU interrupts which are actually processed by
267 + the perfmon interrupt handler. There may be spurious or replay
268 + interrupts.
269 +
270 + ovfl_intr_replay_count (read-only):
271 +
272 + Number of PMU interrupts which were replayed on context switch
273 + in or on event set switching. Interrupts get replayed when they
274 + were in flight at the time monitoring had to be stopped.
275 +
276 + perfmon/ovfl_intr_spurious_count (read-only):
277 +
278 + Number of PMU interrupts which were dropped because there was
279 + no active context (session).
280 +
281 + ovfl_notify_count (read-only):
282 +
283 + Number of user level notifications sent. Notifications are
284 + appended as messages to the context queue. Notifications may
285 + be sent on PMU interrupts.
286 +
287 + pfm_restart_count (read-only):
288 +
289 + Number of times pfm_restart() is called.
290 +
291 + reset_pmds_count (read-only):
292 +
293 + Number of times pfm_reset_pmds() is called.
294 +
295 + set_switch_count (read-only):
296 +
297 + Number of event set switches.
298 +
299 + set_switch_ns (read-only):
300 +
301 + Number of nanoseconds spent in the set switching routine.
302 + Dividing this number by set_switch_count yields the average
303 + cost of switching sets.
304 +
305 + handle_timeout_count (read-only):
306 +
307 + Number of times the pfm_handle_timeout() routine is called.
308 + It is used for timeout-based set switching.
309 +
310 + handle_work_count (read-only):
311 +
312 + Number of times pfm_handle_work() is called. The routine
313 + handles asynchronous perfmon2 work for per-thread contexts
314 + (sessions).
315 +
316 diff --git a/Documentation/perfmon2.txt b/Documentation/perfmon2.txt
317 new file mode 100644
318 index 0000000..4a8fada
319 --- /dev/null
320 +++ b/Documentation/perfmon2.txt
321 @@ -0,0 +1,213 @@
322 + The perfmon2 hardware monitoring interface
323 + ------------------------------------------
324 + Stephane Eranian
325 + <eranian@gmail.com>
326 +
327 +I/ Introduction
328 +
329 + The perfmon2 interface provides access to the hardware performance counters of
330 + major processors. Nowadays, all processors implement some flavors of performance
331 + counters which capture micro-architectural level information such as the number
332 + of elapsed cycles, number of cache misses, and so on.
333 +
334 + The interface is implemented as a set of new system calls and a set of config files
335 + in /sys.
336 +
337 + It is possible to monitoring a single thread or a CPU. In either mode, applications
338 + can count or collect samples. System-wide monitoring is supported by running a
339 + monitoring session on each CPU. The interface support event-based sampling where the
340 + sampling period is expressed as the number of occurrences of event, instead of just a
341 + timeout. This approach provides a much better granularity and flexibility.
342 +
343 + For performance reason, it is possible to use a kernel-level sampling buffer to minimize
344 + the overhead incurred by sampling. The format of the buffer, i.e., what is recorded, how
345 + it is recorded, and how it is exported to user-land is controlled by a kernel module called
346 + a custom sampling format. The current implementation comes with a default format but
347 + it is possible to create additional formats. There is an in-kernel registration
348 + interface for formats. Each format is identified by a simple string which a tool
349 + can pass when a monitoring session is created.
350 +
351 + The interface also provides support for event set and multiplexing to work around
352 + hardware limitations in the number of available counters or in how events can be
353 + combined. Each set defines as many counters as the hardware can support. The kernel
354 + then multiplexes the sets. The interface supports time-base switching but also
355 + overflow based switching, i.e., after n overflows of designated counters.
356 +
357 + Applications never manipulates the actual performance counter registers. Instead they see
358 + a logical Performance Monitoring Unit (PMU) composed of a set of config register (PMC)
359 + and a set of data registers (PMD). Note that PMD are not necessarily counters, they
360 + can be buffers. The logical PMU is then mapped onto the actual PMU using a mapping
361 + table which is implemented as a kernel module. The mapping is chosen once for each
362 + new processor. It is visible in /sys/kernel/perfmon/pmu_desc. The kernel module
363 + is automatically loaded on first use.
364 +
365 + A monitoring session, or context, is uniquely identified by a file descriptor
366 + obtained when the context is created. File sharing semantics apply to access
367 + the context inside a process. A context is never inherited across fork. The file
368 + descriptor can be used to received counter overflow notifications or when the
369 + sampling buffer is full. It is possible to use poll/select on the descriptor
370 + to wait for notifications from multiplex contexts. Similarly, the descriptor
371 + supports asynchronous notification via SIGIO.
372 +
373 + Counters are always exported as being 64-bit wide regardless of what the underlying
374 + hardware implements.
375 +
376 +II/ Kernel compilation
377 +
378 + To enable perfmon2, you need to enable CONFIG_PERFMON
379 +
380 +III/ OProfile interactions
381 +
382 + The set of features offered by perfmon2 is rich enough to support migrating
383 + Oprofile on top of it. That means that PMU programming and low-level interrupt
384 + handling could be done by perfmon2. The Oprofile sampling buffer management code
385 + in the kernel as well as how samples are exported to users could remain through
386 + the use of a custom sampling buffer format. This is how Oprofile work on Itanium.
387 +
388 + The current interactions with Oprofile are:
389 + - on X86: Both subsystems can be compiled into the same kernel. There is enforced
390 + mutual exclusion between the two subsystems. When there is an Oprofile
391 + session, no perfmon2 session can exist and vice-versa. Perfmon2 session
392 + encapsulates both per-thread and system-wide sessions here.
393 +
394 + - On IA-64: Oprofile works on top of perfmon2. Oprofile being a system-wide monitoring
395 + tool, the regular per-thread vs. system-wide session restrictions apply.
396 +
397 + - on PPC: no integration yet. You need to enable/disble one of the two subsystems
398 + - on MIPS: no integration yet. You need to enable/disble one of the two subsystems
399 +
400 +IV/ User tools
401 +
402 + We have released a simple monitoring tool to demonstrate the feature of the
403 + interface. The tool is called pfmon and it comes with a simple helper library
404 + called libpfm. The library comes with a set of examples to show how to use the
405 + kernel perfmon2 interface. Visit http://perfmon2.sf.net for details.
406 +
407 + There maybe other tools available for perfmon2.
408 +
409 +V/ How to program?
410 +
411 + The best way to learn how to program perfmon2, is to take a look at the source
412 + code for the examples in libpfm. The source code is available from:
413 + http://perfmon2.sf.net
414 +
415 +VI/ System calls overview
416 +
417 + The interface is implemented by the following system calls:
418 +
419 + * int pfm_create_context(pfarg_ctx_t *ctx, char *fmt, void *arg, size_t arg_size)
420 +
421 + This function create a perfmon2 context. The type of context is per-thread by
422 + default unless PFM_FL_SYSTEM_WIDE is passed in ctx. The sampling format name
423 + is passed in fmt. Arguments to the format are passed in arg which is of size
424 + arg_size. Upon successful return, the file descriptor identifying the context
425 + is returned.
426 +
427 + * int pfm_write_pmds(int fd, pfarg_pmd_t *pmds, int n)
428 +
429 + This function is used to program the PMD registers. It is possible to pass
430 + vectors of PMDs.
431 +
432 + * int pfm_write_pmcs(int fd, pfarg_pmc_t *pmds, int n)
433 +
434 + This function is used to program the PMC registers. It is possible to pass
435 + vectors of PMDs.
436 +
437 + * int pfm_read_pmds(int fd, pfarg_pmd_t *pmds, int n)
438 +
439 + This function is used to read the PMD registers. It is possible to pass
440 + vectors of PMDs.
441 +
442 + * int pfm_load_context(int fd, pfarg_load_t *load)
443 +
444 + This function is used to attach the context to a thread or CPU.
445 + Thread means kernel-visible thread (NPTL). The thread identification
446 + as obtained by gettid must be passed to load->load_target.
447 +
448 + To operate on another thread (not self), it is mandatory that the thread
449 + be stopped via ptrace().
450 +
451 + To attach to a CPU, the CPU number must be specified in load->load_target
452 + AND the call must be issued on that CPU. To monitor a CPU, a thread MUST
453 + be pinned on that CPU.
454 +
455 + Until the context is attached, the actual counters are not accessed.
456 +
457 + * int pfm_unload_context(int fd)
458 +
459 + The context is detached for the thread or CPU is was attached to.
460 + As a consequence monitoring is stopped.
461 +
462 + When monitoring another thread, the thread MUST be stopped via ptrace()
463 + for this function to succeed.
464 +
465 + * int pfm_start(int fd, pfarg_start_t *st)
466 +
467 + Start monitoring. The context must be attached for this function to succeed.
468 + Optionally, it is possible to specify the event set on which to start using the
469 + st argument, otherwise just pass NULL.
470 +
471 + When monitoring another thread, the thread MUST be stopped via ptrace()
472 + for this function to succeed.
473 +
474 + * int pfm_stop(int fd)
475 +
476 + Stop monitoring. The context must be attached for this function to succeed.
477 +
478 + When monitoring another thread, the thread MUST be stopped via ptrace()
479 + for this function to succeed.
480 +
481 +
482 + * int pfm_create_evtsets(int fd, pfarg_setdesc_t *sets, int n)
483 +
484 + This function is used to create or change event sets. By default set 0 exists.
485 + It is possible to create/change multiple sets in one call.
486 +
487 + The context must be detached for this call to succeed.
488 +
489 + Sets are identified by a 16-bit integer. They are sorted based on this
490 + set and switching occurs in a round-robin fashion.
491 +
492 + * int pfm_delete_evtsets(int fd, pfarg_setdesc_t *sets, int n)
493 +
494 + Delete event sets. The context must be detached for this call to succeed.
495 +
496 +
497 + * int pfm_getinfo_evtsets(int fd, pfarg_setinfo_t *sets, int n)
498 +
499 + Retrieve information about event sets. In particular it is possible
500 + to get the number of activation of a set. It is possible to retrieve
501 + information about multiple sets in one call.
502 +
503 +
504 + * int pfm_restart(int fd)
505 +
506 + Indicate to the kernel that the application is done processing an overflow
507 + notification. A consequence of this call could be that monitoring resumes.
508 +
509 + * int read(fd, pfm_msg_t *msg, sizeof(pfm_msg_t))
510 +
511 + the regular read() system call can be used with the context file descriptor to
512 + receive overflow notification messages. Non-blocking read() is supported.
513 +
514 + Each message carry information about the overflow such as which counter overflowed
515 + and where the program was (interrupted instruction pointer).
516 +
517 + * int close(int fd)
518 +
519 + To destroy a context, the regular close() system call is used.
520 +
521 +
522 +VII/ /sys interface overview
523 +
524 + Refer to Documentation/ABI/testing/sysfs-perfmon-* for a detailed description
525 + of the sysfs interface of perfmon2.
526 +
527 +VIII/ debugfs interface overview
528 +
529 + Refer to Documentation/perfmon2-debugfs.txt for a detailed description of the
530 + debug and statistics interface of perfmon2.
531 +
532 +IX/ Documentation
533 +
534 + Visit http://perfmon2.sf.net
535 diff --git a/MAINTAINERS b/MAINTAINERS
536 index 8dae455..fb38c2a 100644
537 --- a/MAINTAINERS
538 +++ b/MAINTAINERS
539 @@ -3239,6 +3239,14 @@ M: balbir@linux.vnet.ibm.com
540 L: linux-kernel@vger.kernel.org
541 S: Maintained
542
543 +PERFMON SUBSYSTEM
544 +P: Stephane Eranian
545 +M: eranian@gmail.com
546 +L: perfmon2-devel@lists.sf.net
547 +W: http://perfmon2.sf.net
548 +T: git kernel.org:/pub/scm/linux/kernel/git/eranian/linux-2.6
549 +S: Maintained
550 +
551 PERSONALITY HANDLING
552 P: Christoph Hellwig
553 M: hch@infradead.org
554 diff --git a/Makefile b/Makefile
555 index 16e3fbb..7bb1320 100644
556 --- a/Makefile
557 +++ b/Makefile
558 @@ -620,6 +620,7 @@ export mod_strip_cmd
559
560 ifeq ($(KBUILD_EXTMOD),)
561 core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
562 +core-$(CONFIG_PERFMON) += perfmon/
563
564 vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
565 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
566 diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
567 index 48e496f..1d79b01 100644
568 --- a/arch/ia64/Kconfig
569 +++ b/arch/ia64/Kconfig
570 @@ -470,14 +470,6 @@ config COMPAT_FOR_U64_ALIGNMENT
571 config IA64_MCA_RECOVERY
572 tristate "MCA recovery from errors other than TLB."
573
574 -config PERFMON
575 - bool "Performance monitor support"
576 - help
577 - Selects whether support for the IA-64 performance monitor hardware
578 - is included in the kernel. This makes some kernel data-structures a
579 - little bigger and slows down execution a bit, but it is generally
580 - a good idea to turn this on. If you're unsure, say Y.
581 -
582 config IA64_PALINFO
583 tristate "/proc/pal support"
584 help
585 @@ -549,6 +541,8 @@ source "drivers/firmware/Kconfig"
586
587 source "fs/Kconfig.binfmt"
588
589 +source "arch/ia64/perfmon/Kconfig"
590 +
591 endmenu
592
593 menu "Power management and ACPI"
594 diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
595 index 905d25b..9aa622d 100644
596 --- a/arch/ia64/Makefile
597 +++ b/arch/ia64/Makefile
598 @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
599 core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
600 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
601 core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
602 +core-$(CONFIG_PERFMON) += arch/ia64/perfmon/
603 core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/
604 core-$(CONFIG_KVM) += arch/ia64/kvm/
605
606 diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
607 index 9f48397..ff9572a 100644
608 --- a/arch/ia64/configs/generic_defconfig
609 +++ b/arch/ia64/configs/generic_defconfig
610 @@ -209,7 +209,6 @@ CONFIG_IA32_SUPPORT=y
611 CONFIG_COMPAT=y
612 CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
613 CONFIG_IA64_MCA_RECOVERY=y
614 -CONFIG_PERFMON=y
615 CONFIG_IA64_PALINFO=y
616 # CONFIG_IA64_MC_ERR_INJECT is not set
617 CONFIG_SGI_SN=y
618 @@ -234,6 +233,16 @@ CONFIG_BINFMT_ELF=y
619 CONFIG_BINFMT_MISC=m
620
621 #
622 +# Hardware Performance Monitoring support
623 +#
624 +CONFIG_PERFMON=y
625 +CONFIG_IA64_PERFMON_COMPAT=y
626 +CONFIG_IA64_PERFMON_GENERIC=m
627 +CONFIG_IA64_PERFMON_ITANIUM=y
628 +CONFIG_IA64_PERFMON_MCKINLEY=y
629 +CONFIG_IA64_PERFMON_MONTECITO=y
630 +
631 +#
632 # Power management and ACPI
633 #
634 CONFIG_PM=y
635 diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
636 index ccbe8ae..cf64b3b 100644
637 --- a/arch/ia64/include/asm/Kbuild
638 +++ b/arch/ia64/include/asm/Kbuild
639 @@ -5,10 +5,12 @@ header-y += fpu.h
640 header-y += fpswa.h
641 header-y += ia64regs.h
642 header-y += intel_intrin.h
643 -header-y += perfmon_default_smpl.h
644 header-y += ptrace_offsets.h
645 header-y += rse.h
646 header-y += ucontext.h
647 +header-y += perfmon.h
648 +header-y += perfmon_compat.h
649 +header-y += perfmon_default_smpl.h
650
651 unifdef-y += gcc_intrin.h
652 unifdef-y += intrinsics.h
653 diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h
654 index 5c99cbc..4a45cb0 100644
655 --- a/arch/ia64/include/asm/hw_irq.h
656 +++ b/arch/ia64/include/asm/hw_irq.h
657 @@ -67,9 +67,9 @@ extern int ia64_last_device_vector;
658 #define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1)
659
660 #define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */
661 -#define IA64_PERFMON_VECTOR 0xee /* performance monitor interrupt vector */
662 #define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */
663 #define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */
664 +#define IA64_PERFMON_VECTOR 0xf1 /* performance monitor interrupt vector */
665 #define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */
666 #define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */
667 #define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */
668 diff --git a/arch/ia64/include/asm/perfmon.h b/arch/ia64/include/asm/perfmon.h
669 index 7f3333d..150c4b4 100644
670 --- a/arch/ia64/include/asm/perfmon.h
671 +++ b/arch/ia64/include/asm/perfmon.h
672 @@ -1,279 +1,59 @@
673 /*
674 - * Copyright (C) 2001-2003 Hewlett-Packard Co
675 - * Stephane Eranian <eranian@hpl.hp.com>
676 - */
677 -
678 -#ifndef _ASM_IA64_PERFMON_H
679 -#define _ASM_IA64_PERFMON_H
680 -
681 -/*
682 - * perfmon comamnds supported on all CPU models
683 - */
684 -#define PFM_WRITE_PMCS 0x01
685 -#define PFM_WRITE_PMDS 0x02
686 -#define PFM_READ_PMDS 0x03
687 -#define PFM_STOP 0x04
688 -#define PFM_START 0x05
689 -#define PFM_ENABLE 0x06 /* obsolete */
690 -#define PFM_DISABLE 0x07 /* obsolete */
691 -#define PFM_CREATE_CONTEXT 0x08
692 -#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */
693 -#define PFM_RESTART 0x0a
694 -#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */
695 -#define PFM_GET_FEATURES 0x0c
696 -#define PFM_DEBUG 0x0d
697 -#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */
698 -#define PFM_GET_PMC_RESET_VAL 0x0f
699 -#define PFM_LOAD_CONTEXT 0x10
700 -#define PFM_UNLOAD_CONTEXT 0x11
701 -
702 -/*
703 - * PMU model specific commands (may not be supported on all PMU models)
704 - */
705 -#define PFM_WRITE_IBRS 0x20
706 -#define PFM_WRITE_DBRS 0x21
707 -
708 -/*
709 - * context flags
710 - */
711 -#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */
712 -#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */
713 -#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */
714 -
715 -/*
716 - * event set flags
717 - */
718 -#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */
719 -
720 -/*
721 - * PMC flags
722 - */
723 -#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */
724 -#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */
725 -
726 -/*
727 - * PMD/PMC/IBR/DBR return flags (ignored on input)
728 + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
729 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
730 *
731 - * Those flags are used on output and must be checked in case EAGAIN is returned
732 - * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
733 - */
734 -#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */
735 -#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */
736 -#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
737 -
738 -#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0)
739 -
740 -typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */
741 -
742 -/*
743 - * Request structure used to define a context
744 - */
745 -typedef struct {
746 - pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */
747 - unsigned long ctx_flags; /* noblock/block */
748 - unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */
749 - unsigned short ctx_reserved1; /* for future use */
750 - int ctx_fd; /* return arg: unique identification for context */
751 - void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */
752 - unsigned long ctx_reserved2[11];/* for future use */
753 -} pfarg_context_t;
754 -
755 -/*
756 - * Request structure used to write/read a PMC or PMD
757 - */
758 -typedef struct {
759 - unsigned int reg_num; /* which register */
760 - unsigned short reg_set; /* event set for this register */
761 - unsigned short reg_reserved1; /* for future use */
762 -
763 - unsigned long reg_value; /* initial pmc/pmd value */
764 - unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */
765 -
766 - unsigned long reg_long_reset; /* reset after buffer overflow notification */
767 - unsigned long reg_short_reset; /* reset after counter overflow */
768 -
769 - unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */
770 - unsigned long reg_random_seed; /* seed value when randomization is used */
771 - unsigned long reg_random_mask; /* bitmask used to limit random value */
772 - unsigned long reg_last_reset_val;/* return: PMD last reset value */
773 -
774 - unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */
775 - unsigned long reg_smpl_eventid; /* opaque sampling event identifier */
776 -
777 - unsigned long reg_reserved2[3]; /* for future use */
778 -} pfarg_reg_t;
779 -
780 -typedef struct {
781 - unsigned int dbreg_num; /* which debug register */
782 - unsigned short dbreg_set; /* event set for this register */
783 - unsigned short dbreg_reserved1; /* for future use */
784 - unsigned long dbreg_value; /* value for debug register */
785 - unsigned long dbreg_flags; /* return: dbreg error */
786 - unsigned long dbreg_reserved2[1]; /* for future use */
787 -} pfarg_dbreg_t;
788 -
789 -typedef struct {
790 - unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */
791 - unsigned int ft_reserved; /* reserved for future use */
792 - unsigned long reserved[4]; /* for future use */
793 -} pfarg_features_t;
794 -
795 -typedef struct {
796 - pid_t load_pid; /* process to load the context into */
797 - unsigned short load_set; /* first event set to load */
798 - unsigned short load_reserved1; /* for future use */
799 - unsigned long load_reserved2[3]; /* for future use */
800 -} pfarg_load_t;
801 -
802 -typedef struct {
803 - int msg_type; /* generic message header */
804 - int msg_ctx_fd; /* generic message header */
805 - unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */
806 - unsigned short msg_active_set; /* active set at the time of overflow */
807 - unsigned short msg_reserved1; /* for future use */
808 - unsigned int msg_reserved2; /* for future use */
809 - unsigned long msg_tstamp; /* for perf tuning/debug */
810 -} pfm_ovfl_msg_t;
811 -
812 -typedef struct {
813 - int msg_type; /* generic message header */
814 - int msg_ctx_fd; /* generic message header */
815 - unsigned long msg_tstamp; /* for perf tuning */
816 -} pfm_end_msg_t;
817 -
818 -typedef struct {
819 - int msg_type; /* type of the message */
820 - int msg_ctx_fd; /* unique identifier for the context */
821 - unsigned long msg_tstamp; /* for perf tuning */
822 -} pfm_gen_msg_t;
823 -
824 -#define PFM_MSG_OVFL 1 /* an overflow happened */
825 -#define PFM_MSG_END 2 /* task to which context was attached ended */
826 -
827 -typedef union {
828 - pfm_ovfl_msg_t pfm_ovfl_msg;
829 - pfm_end_msg_t pfm_end_msg;
830 - pfm_gen_msg_t pfm_gen_msg;
831 -} pfm_msg_t;
832 -
833 -/*
834 - * Define the version numbers for both perfmon as a whole and the sampling buffer format.
835 + * This file contains Itanium Processor Family specific definitions
836 + * for the perfmon interface.
837 + *
838 + * This program is free software; you can redistribute it and/or
839 + * modify it under the terms of version 2 of the GNU General Public
840 + * License as published by the Free Software Foundation.
841 + *
842 + * This program is distributed in the hope that it will be useful,
843 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
844 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
845 + * General Public License for more details.
846 + *
847 + * You should have received a copy of the GNU General Public License
848 + * along with this program; if not, write to the Free Software
849 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
850 + * 02111-1307 USA
851 */
852 -#define PFM_VERSION_MAJ 2U
853 -#define PFM_VERSION_MIN 0U
854 -#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
855 -#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff)
856 -#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
857 -
858 +#ifndef _ASM_IA64_PERFMON_H_
859 +#define _ASM_IA64_PERFMON_H_
860
861 /*
862 - * miscellaneous architected definitions
863 + * arch-specific user visible interface definitions
864 */
865 -#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */
866 -#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */
867 -#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */
868 -
869 -#ifdef __KERNEL__
870 -
871 -extern long perfmonctl(int fd, int cmd, void *arg, int narg);
872 -
873 -typedef struct {
874 - void (*handler)(int irq, void *arg, struct pt_regs *regs);
875 -} pfm_intr_handler_desc_t;
876 -
877 -extern void pfm_save_regs (struct task_struct *);
878 -extern void pfm_load_regs (struct task_struct *);
879
880 -extern void pfm_exit_thread(struct task_struct *);
881 -extern int pfm_use_debug_registers(struct task_struct *);
882 -extern int pfm_release_debug_registers(struct task_struct *);
883 -extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
884 -extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
885 -extern void pfm_init_percpu(void);
886 -extern void pfm_handle_work(void);
887 -extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
888 -extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
889 +#define PFM_ARCH_MAX_PMCS (256+64)
890 +#define PFM_ARCH_MAX_PMDS (256+64)
891
892 -
893 -
894 -/*
895 - * Reset PMD register flags
896 - */
897 -#define PFM_PMD_SHORT_RESET 0
898 -#define PFM_PMD_LONG_RESET 1
899 -
900 -typedef union {
901 - unsigned int val;
902 - struct {
903 - unsigned int notify_user:1; /* notify user program of overflow */
904 - unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */
905 - unsigned int block_task:1; /* block monitored task on kernel exit */
906 - unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
907 - unsigned int reserved:28; /* for future use */
908 - } bits;
909 -} pfm_ovfl_ctrl_t;
910 -
911 -typedef struct {
912 - unsigned char ovfl_pmd; /* index of overflowed PMD */
913 - unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */
914 - unsigned short active_set; /* event set active at the time of the overflow */
915 - pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */
916 -
917 - unsigned long pmd_last_reset; /* last reset value of of the PMD */
918 - unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */
919 - unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */
920 - unsigned long pmd_value; /* current 64-bit value of the PMD */
921 - unsigned long pmd_eventid; /* eventid associated with PMD */
922 -} pfm_ovfl_arg_t;
923 -
924 -
925 -typedef struct {
926 - char *fmt_name;
927 - pfm_uuid_t fmt_uuid;
928 - size_t fmt_arg_size;
929 - unsigned long fmt_flags;
930 -
931 - int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg);
932 - int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size);
933 - int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg);
934 - int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
935 - int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
936 - int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
937 - int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);
938 -
939 - struct list_head fmt_list;
940 -} pfm_buffer_fmt_t;
941 -
942 -extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
943 -extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
944 +#define PFM_ARCH_PMD_STK_ARG 8
945 +#define PFM_ARCH_PMC_STK_ARG 8
946
947 /*
948 - * perfmon interface exported to modules
949 + * Itanium specific context flags
950 + *
951 + * bits[00-15]: generic flags (see asm/perfmon.h)
952 + * bits[16-31]: arch-specific flags
953 */
954 -extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
955 -extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
956 -extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
957 -extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
958 +#define PFM_ITA_FL_INSECURE 0x10000 /* clear psr.sp on non system, non self */
959
960 /*
961 - * describe the content of the local_cpu_date->pfm_syst_info field
962 + * Itanium specific public event set flags (set_flags)
963 + *
964 + * event set flags layout:
965 + * bits[00-15] : generic flags
966 + * bits[16-31] : arch-specific flags
967 */
968 -#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */
969 -#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */
970 -#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */
971 +#define PFM_ITA_SETFL_EXCL_INTR 0x10000 /* exclude interrupt execution */
972 +#define PFM_ITA_SETFL_INTR_ONLY 0x20000 /* include only interrupt execution */
973 +#define PFM_ITA_SETFL_IDLE_EXCL 0x40000 /* stop monitoring in idle loop */
974
975 /*
976 - * sysctl control structure. visible to sampling formats
977 + * compatibility for version v2.0 of the interface
978 */
979 -typedef struct {
980 - int debug; /* turn on/off debugging via syslog */
981 - int debug_ovfl; /* turn on/off debug printk in overflow handler */
982 - int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
983 - int expert_mode; /* turn on/off value checking */
984 -} pfm_sysctl_t;
985 -extern pfm_sysctl_t pfm_sysctl;
986 -
987 -
988 -#endif /* __KERNEL__ */
989 +#include <asm/perfmon_compat.h>
990
991 -#endif /* _ASM_IA64_PERFMON_H */
992 +#endif /* _ASM_IA64_PERFMON_H_ */
993 diff --git a/arch/ia64/include/asm/perfmon_compat.h b/arch/ia64/include/asm/perfmon_compat.h
994 new file mode 100644
995 index 0000000..5c14514
996 --- /dev/null
997 +++ b/arch/ia64/include/asm/perfmon_compat.h
998 @@ -0,0 +1,167 @@
999 +/*
1000 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
1001 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
1002 + *
1003 + * This header file contains perfmon interface definition
1004 + * that are now obsolete and should be dropped in favor
1005 + * of their equivalent functions as explained below.
1006 + *
1007 + * This program is free software; you can redistribute it and/or
1008 + * modify it under the terms of version 2 of the GNU General Public
1009 + * License as published by the Free Software Foundation.
1010 + *
1011 + * This program is distributed in the hope that it will be useful,
1012 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1013 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1014 + * General Public License for more details.
1015 + *
1016 + * You should have received a copy of the GNU General Public License
1017 + * along with this program; if not, write to the Free Software
1018 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1019 + * 02111-1307 USA
1020 + */
1021 +
1022 +#ifndef _ASM_IA64_PERFMON_COMPAT_H_
1023 +#define _ASM_IA64_PERFMON_COMPAT_H_
1024 +
1025 +/*
1026 + * custom sampling buffer identifier type
1027 + */
1028 +typedef __u8 pfm_uuid_t[16];
1029 +
1030 +/*
1031 + * obsolete perfmon commands. Supported only on IA-64 for
1032 + * backward compatiblity reasons with perfmon v2.0.
1033 + */
1034 +#define PFM_WRITE_PMCS 0x01 /* use pfm_write_pmcs */
1035 +#define PFM_WRITE_PMDS 0x02 /* use pfm_write_pmds */
1036 +#define PFM_READ_PMDS 0x03 /* use pfm_read_pmds */
1037 +#define PFM_STOP 0x04 /* use pfm_stop */
1038 +#define PFM_START 0x05 /* use pfm_start */
1039 +#define PFM_ENABLE 0x06 /* obsolete */
1040 +#define PFM_DISABLE 0x07 /* obsolete */
1041 +#define PFM_CREATE_CONTEXT 0x08 /* use pfm_create_context */
1042 +#define PFM_DESTROY_CONTEXT 0x09 /* use close() */
1043 +#define PFM_RESTART 0x0a /* use pfm_restart */
1044 +#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */
1045 +#define PFM_GET_FEATURES 0x0c /* use /proc/sys/perfmon */
1046 +#define PFM_DEBUG 0x0d /* /proc/sys/kernel/perfmon/debug */
1047 +#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */
1048 +#define PFM_GET_PMC_RESET_VAL 0x0f /* use /proc/perfmon_map */
1049 +#define PFM_LOAD_CONTEXT 0x10 /* use pfm_load_context */
1050 +#define PFM_UNLOAD_CONTEXT 0x11 /* use pfm_unload_context */
1051 +
1052 +/*
1053 + * PMU model specific commands (may not be supported on all PMU models)
1054 + */
1055 +#define PFM_WRITE_IBRS 0x20 /* obsolete: use PFM_WRITE_PMCS[256-263]*/
1056 +#define PFM_WRITE_DBRS 0x21 /* obsolete: use PFM_WRITE_PMCS[264-271]*/
1057 +
1058 +/*
1059 + * argument to PFM_CREATE_CONTEXT
1060 + */
1061 +struct pfarg_context {
1062 + pfm_uuid_t ctx_smpl_buf_id; /* buffer format to use */
1063 + unsigned long ctx_flags; /* noblock/block */
1064 + unsigned int ctx_reserved1; /* for future use */
1065 + int ctx_fd; /* return: fildesc */
1066 + void *ctx_smpl_vaddr; /* return: vaddr of buffer */
1067 + unsigned long ctx_reserved3[11];/* for future use */
1068 +};
1069 +
1070 +/*
1071 + * argument structure for PFM_WRITE_PMCS/PFM_WRITE_PMDS/PFM_WRITE_PMDS
1072 + */
1073 +struct pfarg_reg {
1074 + unsigned int reg_num; /* which register */
1075 + unsigned short reg_set; /* event set for this register */
1076 + unsigned short reg_reserved1; /* for future use */
1077 +
1078 + unsigned long reg_value; /* initial pmc/pmd value */
1079 + unsigned long reg_flags; /* input: flags, ret: error */
1080 +
1081 + unsigned long reg_long_reset; /* reset value after notification */
1082 + unsigned long reg_short_reset; /* reset after counter overflow */
1083 +
1084 + unsigned long reg_reset_pmds[4]; /* registers to reset on overflow */
1085 + unsigned long reg_random_seed; /* seed for randomization */
1086 + unsigned long reg_random_mask; /* random range limit */
1087 + unsigned long reg_last_reset_val;/* return: PMD last reset value */
1088 +
1089 + unsigned long reg_smpl_pmds[4]; /* pmds to be saved on overflow */
1090 + unsigned long reg_smpl_eventid; /* opaque sampling event id */
1091 + unsigned long reg_ovfl_switch_cnt;/* #overflows to switch */
1092 +
1093 + unsigned long reg_reserved2[2]; /* for future use */
1094 +};
1095 +
1096 +/*
1097 + * argument to PFM_WRITE_IBRS/PFM_WRITE_DBRS
1098 + */
1099 +struct pfarg_dbreg {
1100 + unsigned int dbreg_num; /* which debug register */
1101 + unsigned short dbreg_set; /* event set */
1102 + unsigned short dbreg_reserved1; /* for future use */
1103 + unsigned long dbreg_value; /* value for debug register */
1104 + unsigned long dbreg_flags; /* return: dbreg error */
1105 + unsigned long dbreg_reserved2[1]; /* for future use */
1106 +};
1107 +
1108 +/*
1109 + * argument to PFM_GET_FEATURES
1110 + */
1111 +struct pfarg_features {
1112 + unsigned int ft_version; /* major [16-31], minor [0-15] */
1113 + unsigned int ft_reserved; /* reserved for future use */
1114 + unsigned long reserved[4]; /* for future use */
1115 +};
1116 +
1117 +typedef struct {
1118 + int msg_type; /* generic message header */
1119 + int msg_ctx_fd; /* generic message header */
1120 + unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */
1121 + unsigned short msg_active_set; /* active set on overflow */
1122 + unsigned short msg_reserved1; /* for future use */
1123 + unsigned int msg_reserved2; /* for future use */
1124 + unsigned long msg_tstamp; /* for perf tuning/debug */
1125 +} pfm_ovfl_msg_t;
1126 +
1127 +typedef struct {
1128 + int msg_type; /* generic message header */
1129 + int msg_ctx_fd; /* generic message header */
1130 + unsigned long msg_tstamp; /* for perf tuning */
1131 +} pfm_end_msg_t;
1132 +
1133 +typedef struct {
1134 + int msg_type; /* type of the message */
1135 + int msg_ctx_fd; /* context file descriptor */
1136 + unsigned long msg_tstamp; /* for perf tuning */
1137 +} pfm_gen_msg_t;
1138 +
1139 +typedef union {
1140 + int type;
1141 + pfm_ovfl_msg_t pfm_ovfl_msg;
1142 + pfm_end_msg_t pfm_end_msg;
1143 + pfm_gen_msg_t pfm_gen_msg;
1144 +} pfm_msg_t;
1145 +
1146 +/*
1147 + * PMD/PMC return flags in case of error (ignored on input)
1148 + *
1149 + * reg_flags layout:
1150 + * bit 00-15 : generic flags
1151 + * bits[16-23] : arch-specific flags (see asm/perfmon.h)
1152 + * bit 24-31 : error codes
1153 + *
1154 + * Those flags are used on output and must be checked in case EINVAL is
1155 + * returned by a command accepting a vector of values and each has a flag
1156 + * field, such as pfarg_reg or pfarg_reg
1157 + */
1158 +#define PFM_REG_RETFL_NOTAVAIL (1<<31) /* not implemented or unaccessible */
1159 +#define PFM_REG_RETFL_EINVAL (1<<30) /* entry is invalid */
1160 +#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|\
1161 + PFM_REG_RETFL_EINVAL)
1162 +
1163 +#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0)
1164 +
1165 +#endif /* _ASM_IA64_PERFMON_COMPAT_H_ */
1166 diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h
1167 index 48822c0..8234f32 100644
1168 --- a/arch/ia64/include/asm/perfmon_default_smpl.h
1169 +++ b/arch/ia64/include/asm/perfmon_default_smpl.h
1170 @@ -1,83 +1,106 @@
1171 /*
1172 - * Copyright (C) 2002-2003 Hewlett-Packard Co
1173 - * Stephane Eranian <eranian@hpl.hp.com>
1174 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
1175 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
1176 *
1177 - * This file implements the default sampling buffer format
1178 - * for Linux/ia64 perfmon subsystem.
1179 + * This file implements the old default sampling buffer format
1180 + * for the perfmon2 subsystem. For IA-64 only.
1181 + *
1182 + * It requires the use of the perfmon_compat.h header. It is recommended
1183 + * that applications be ported to the new format instead.
1184 + *
1185 + * This program is free software; you can redistribute it and/or
1186 + * modify it under the terms of version 2 of the GNU General Public
1187 + * License as published by the Free Software Foundation.
1188 + *
1189 + * This program is distributed in the hope that it will be useful,
1190 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1191 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1192 + * General Public License for more details.
1193 + *
1194 + * You should have received a copy of the GNU General Public License
1195 + * along with this program; if not, write to the Free Software
1196 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1197 + * 02111-1307 USA
1198 */
1199 -#ifndef __PERFMON_DEFAULT_SMPL_H__
1200 -#define __PERFMON_DEFAULT_SMPL_H__ 1
1201 +#ifndef __ASM_IA64_PERFMON_DEFAULT_SMPL_H__
1202 +#define __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ 1
1203 +
1204 +#ifndef __ia64__
1205 +#error "this file must be used for compatibility reasons only on IA-64"
1206 +#endif
1207
1208 #define PFM_DEFAULT_SMPL_UUID { \
1209 - 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
1210 + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\
1211 + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
1212
1213 /*
1214 * format specific parameters (passed at context creation)
1215 */
1216 -typedef struct {
1217 +struct pfm_default_smpl_arg {
1218 unsigned long buf_size; /* size of the buffer in bytes */
1219 unsigned int flags; /* buffer specific flags */
1220 unsigned int res1; /* for future use */
1221 unsigned long reserved[2]; /* for future use */
1222 -} pfm_default_smpl_arg_t;
1223 +};
1224
1225 /*
1226 * combined context+format specific structure. Can be passed
1227 - * to PFM_CONTEXT_CREATE
1228 + * to PFM_CONTEXT_CREATE (not PFM_CONTEXT_CREATE2)
1229 */
1230 -typedef struct {
1231 - pfarg_context_t ctx_arg;
1232 - pfm_default_smpl_arg_t buf_arg;
1233 -} pfm_default_smpl_ctx_arg_t;
1234 +struct pfm_default_smpl_ctx_arg {
1235 + struct pfarg_context ctx_arg;
1236 + struct pfm_default_smpl_arg buf_arg;
1237 +};
1238
1239 /*
1240 * This header is at the beginning of the sampling buffer returned to the user.
1241 * It is directly followed by the first record.
1242 */
1243 -typedef struct {
1244 - unsigned long hdr_count; /* how many valid entries */
1245 - unsigned long hdr_cur_offs; /* current offset from top of buffer */
1246 - unsigned long hdr_reserved2; /* reserved for future use */
1247 +struct pfm_default_smpl_hdr {
1248 + u64 hdr_count; /* how many valid entries */
1249 + u64 hdr_cur_offs; /* current offset from top of buffer */
1250 + u64 dr_reserved2; /* reserved for future use */
1251
1252 - unsigned long hdr_overflows; /* how many times the buffer overflowed */
1253 - unsigned long hdr_buf_size; /* how many bytes in the buffer */
1254 + u64 hdr_overflows; /* how many times the buffer overflowed */
1255 + u64 hdr_buf_size; /* how many bytes in the buffer */
1256
1257 - unsigned int hdr_version; /* contains perfmon version (smpl format diffs) */
1258 - unsigned int hdr_reserved1; /* for future use */
1259 - unsigned long hdr_reserved[10]; /* for future use */
1260 -} pfm_default_smpl_hdr_t;
1261 + u32 hdr_version; /* smpl format version*/
1262 + u32 hdr_reserved1; /* for future use */
1263 + u64 hdr_reserved[10]; /* for future use */
1264 +};
1265
1266 /*
1267 * Entry header in the sampling buffer. The header is directly followed
1268 - * with the values of the PMD registers of interest saved in increasing
1269 - * index order: PMD4, PMD5, and so on. How many PMDs are present depends
1270 + * with the values of the PMD registers of interest saved in increasing
1271 + * index order: PMD4, PMD5, and so on. How many PMDs are present depends
1272 * on how the session was programmed.
1273 *
1274 * In the case where multiple counters overflow at the same time, multiple
1275 * entries are written consecutively.
1276 *
1277 - * last_reset_value member indicates the initial value of the overflowed PMD.
1278 + * last_reset_value member indicates the initial value of the overflowed PMD.
1279 */
1280 -typedef struct {
1281 - int pid; /* thread id (for NPTL, this is gettid()) */
1282 - unsigned char reserved1[3]; /* reserved for future use */
1283 - unsigned char ovfl_pmd; /* index of overflowed PMD */
1284 -
1285 - unsigned long last_reset_val; /* initial value of overflowed PMD */
1286 - unsigned long ip; /* where did the overflow interrupt happened */
1287 - unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */
1288 -
1289 - unsigned short cpu; /* cpu on which the overfow occured */
1290 - unsigned short set; /* event set active when overflow ocurred */
1291 - int tgid; /* thread group id (for NPTL, this is getpid()) */
1292 -} pfm_default_smpl_entry_t;
1293 +struct pfm_default_smpl_entry {
1294 + pid_t pid; /* thread id (for NPTL, this is gettid()) */
1295 + uint8_t reserved1[3]; /* for future use */
1296 + uint8_t ovfl_pmd; /* overflow pmd for this sample */
1297 + u64 last_reset_val; /* initial value of overflowed PMD */
1298 + unsigned long ip; /* where did the overflow interrupt happened */
1299 + u64 tstamp; /* overflow timetamp */
1300 + u16 cpu; /* cpu on which the overfow occured */
1301 + u16 set; /* event set active when overflow ocurred */
1302 + pid_t tgid; /* thread group id (for NPTL, this is getpid()) */
1303 +};
1304
1305 -#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */
1306 -#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS))
1307 -#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE)
1308 +#define PFM_DEFAULT_MAX_PMDS 64 /* #pmds supported */
1309 +#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(struct pfm_default_smpl_entry)+\
1310 + (sizeof(u64)*PFM_DEFAULT_MAX_PMDS))
1311 +#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(struct pfm_default_smpl_hdr)+\
1312 + PFM_DEFAULT_MAX_ENTRY_SIZE)
1313
1314 #define PFM_DEFAULT_SMPL_VERSION_MAJ 2U
1315 -#define PFM_DEFAULT_SMPL_VERSION_MIN 0U
1316 -#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
1317 +#define PFM_DEFAULT_SMPL_VERSION_MIN 1U
1318 +#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|\
1319 + (PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
1320
1321 -#endif /* __PERFMON_DEFAULT_SMPL_H__ */
1322 +#endif /* __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ */
1323 diff --git a/arch/ia64/include/asm/perfmon_kern.h b/arch/ia64/include/asm/perfmon_kern.h
1324 new file mode 100644
1325 index 0000000..fb40459
1326 --- /dev/null
1327 +++ b/arch/ia64/include/asm/perfmon_kern.h
1328 @@ -0,0 +1,356 @@
1329 +/*
1330 + * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
1331 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
1332 + *
1333 + * This file contains Itanium Processor Family specific definitions
1334 + * for the perfmon interface.
1335 + *
1336 + * This program is free software; you can redistribute it and/or
1337 + * modify it under the terms of version 2 of the GNU General Public
1338 + * License as published by the Free Software Foundation.
1339 + *
1340 + * This program is distributed in the hope that it will be useful,
1341 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1342 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1343 + * General Public License for more details.
1344 + *
1345 + * You should have received a copy of the GNU General Public License
1346 + * along with this program; if not, write to the Free Software
1347 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1348 + * 02111-1307 USA
1349 + */
1350 +#ifndef _ASM_IA64_PERFMON_KERN_H_
1351 +#define _ASM_IA64_PERFMON_KERN_H_
1352 +
1353 +#ifdef __KERNEL__
1354 +
1355 +#ifdef CONFIG_PERFMON
1356 +#include <asm/unistd.h>
1357 +#include <asm/hw_irq.h>
1358 +
1359 +/*
1360 + * describe the content of the pfm_syst_info field
1361 + * layout:
1362 + * bits[00-15] : generic flags
1363 + * bits[16-31] : arch-specific flags
1364 + */
1365 +#define PFM_ITA_CPUINFO_IDLE_EXCL 0x10000 /* stop monitoring in idle loop */
1366 +
1367 +/*
1368 + * For some CPUs, the upper bits of a counter must be set in order for the
1369 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
1370 + * and the upper bits are cleared. This function may be used to set them back.
1371 + */
1372 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
1373 + unsigned int cnum)
1374 +{}
1375 +
1376 +/*
1377 + * called from __pfm_interrupt_handler(). ctx is not NULL.
1378 + * ctx is locked. PMU interrupt is masked.
1379 + *
1380 + * must stop all monitoring to ensure handler has consistent view.
1381 + * must collect overflowed PMDs bitmask into povfls_pmds and
1382 + * npend_ovfls. If no interrupt detected then npend_ovfls
1383 + * must be set to zero.
1384 + */
1385 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
1386 + struct pfm_event_set *set)
1387 +{
1388 + u64 tmp;
1389 +
1390 + /*
1391 + * do not overwrite existing value, must
1392 + * process those first (coming from context switch replay)
1393 + */
1394 + if (set->npend_ovfls)
1395 + return;
1396 +
1397 + ia64_srlz_d();
1398 +
1399 + tmp = ia64_get_pmc(0) & ~0xf;
1400 +
1401 + set->povfl_pmds[0] = tmp;
1402 +
1403 + set->npend_ovfls = ia64_popcnt(tmp);
1404 +}
1405 +
1406 +static inline int pfm_arch_init_pmu_config(void)
1407 +{
1408 + return 0;
1409 +}
1410 +
1411 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
1412 +{
1413 + ia64_resend_irq(IA64_PERFMON_VECTOR);
1414 +}
1415 +
1416 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
1417 + struct pfm_event_set *set)
1418 +{}
1419 +
1420 +static inline void pfm_arch_serialize(void)
1421 +{
1422 + ia64_srlz_d();
1423 +}
1424 +
1425 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
1426 +{
1427 + PFM_DBG_ovfl("state=%d", ctx->state);
1428 + ia64_set_pmc(0, 0);
1429 + /* no serialization */
1430 +}
1431 +
1432 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
1433 + unsigned int cnum, u64 value)
1434 +{
1435 + if (cnum < 256) {
1436 + ia64_set_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
1437 + } else if (cnum < 264) {
1438 + ia64_set_ibr(cnum-256, value);
1439 + ia64_dv_serialize_instruction();
1440 + } else {
1441 + ia64_set_dbr(cnum-264, value);
1442 + ia64_dv_serialize_instruction();
1443 + }
1444 +}
1445 +
1446 +/*
1447 + * On IA-64, for per-thread context which have the ITA_FL_INSECURE
1448 + * flag, it is possible to start/stop monitoring directly from user evel
1449 + * without calling pfm_start()/pfm_stop. This allows very lightweight
1450 + * control yet the kernel sometimes needs to know if monitoring is actually
1451 + * on or off.
1452 + *
1453 + * Tracking of this information is normally done by pfm_start/pfm_stop
1454 + * in flags.started. Here we need to compensate by checking actual
1455 + * psr bit.
1456 + */
1457 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
1458 +{
1459 + return ctx->flags.started
1460 + || ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_UP|IA64_PSR_PP);
1461 +}
1462 +
1463 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
1464 + unsigned int cnum, u64 value)
1465 +{
1466 + /*
1467 + * for a counting PMD, overflow bit must be cleared
1468 + */
1469 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
1470 + value &= pfm_pmu_conf->ovfl_mask;
1471 +
1472 + /*
1473 + * for counters, write to upper bits are ignored, no need to mask
1474 + */
1475 + ia64_set_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
1476 +}
1477 +
1478 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
1479 +{
1480 + return ia64_get_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr);
1481 +}
1482 +
1483 +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
1484 +{
1485 + return ia64_get_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr);
1486 +}
1487 +
1488 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
1489 + struct pfm_context *ctx)
1490 +{
1491 + struct pt_regs *regs;
1492 +
1493 + regs = task_pt_regs(task);
1494 + ia64_psr(regs)->pp = 0;
1495 +}
1496 +
1497 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
1498 + struct pfm_context *ctx)
1499 +{
1500 + struct pt_regs *regs;
1501 +
1502 + if (!(ctx->active_set->flags & PFM_ITA_SETFL_INTR_ONLY)) {
1503 + regs = task_pt_regs(task);
1504 + ia64_psr(regs)->pp = 1;
1505 + }
1506 +}
1507 +
1508 +/*
1509 + * On IA-64, the PMDs are NOT saved by pfm_arch_freeze_pmu()
1510 + * when entering the PMU interrupt handler, thus, we need
1511 + * to save them in pfm_switch_sets_from_intr()
1512 + */
1513 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
1514 + struct pfm_event_set *set)
1515 +{
1516 + pfm_save_pmds(ctx, set);
1517 +}
1518 +
1519 +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags);
1520 +
1521 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
1522 +{}
1523 +
1524 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
1525 +void pfm_arch_ctxswin_thread(struct task_struct *task,
1526 + struct pfm_context *ctx);
1527 +
1528 +void pfm_arch_unload_context(struct pfm_context *ctx);
1529 +int pfm_arch_load_context(struct pfm_context *ctx);
1530 +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags);
1531 +
1532 +void pfm_arch_mask_monitoring(struct pfm_context *ctx,
1533 + struct pfm_event_set *set);
1534 +void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
1535 + struct pfm_event_set *set);
1536 +
1537 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
1538 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
1539 +
1540 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
1541 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
1542 +
1543 +int pfm_arch_init(void);
1544 +void pfm_arch_init_percpu(void);
1545 +char *pfm_arch_get_pmu_module_name(void);
1546 +
1547 +int __pfm_use_dbregs(struct task_struct *task);
1548 +int __pfm_release_dbregs(struct task_struct *task);
1549 +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
1550 + struct pfm_event_set *set);
1551 +
1552 +void pfm_arch_show_session(struct seq_file *m);
1553 +
1554 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
1555 +{
1556 + return 0;
1557 +}
1558 +
1559 +static inline void pfm_arch_pmu_release(void)
1560 +{}
1561 +
1562 +/* not necessary on IA-64 */
1563 +static inline void pfm_cacheflush(void *addr, unsigned int len)
1564 +{}
1565 +
1566 +/*
1567 + * miscellaneous architected definitions
1568 + */
1569 +#define PFM_ITA_FCNTR 4 /* first counting monitor (PMC/PMD) */
1570 +
1571 +/*
1572 + * private event set flags (set_priv_flags)
1573 + */
1574 +#define PFM_ITA_SETFL_USE_DBR 0x1000000 /* set uses debug registers */
1575 +
1576 +
1577 +/*
1578 + * Itanium-specific data structures
1579 + */
1580 +struct pfm_ia64_context_flags {
1581 + unsigned int use_dbr:1; /* use range restrictions (debug registers) */
1582 + unsigned int insecure:1; /* insecure monitoring for non-self session */
1583 + unsigned int reserved:30;/* for future use */
1584 +};
1585 +
1586 +struct pfm_arch_context {
1587 + struct pfm_ia64_context_flags flags; /* arch specific ctx flags */
1588 + u64 ctx_saved_psr_up;/* storage for psr_up */
1589 +#ifdef CONFIG_IA64_PERFMON_COMPAT
1590 + void *ctx_smpl_vaddr; /* vaddr of user mapping */
1591 +#endif
1592 +};
1593 +
1594 +#ifdef CONFIG_IA64_PERFMON_COMPAT
1595 +ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
1596 + char __user *buf,
1597 + int non_block,
1598 + size_t size);
1599 +int pfm_ia64_compat_init(void);
1600 +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
1601 + size_t rsize, struct file *filp);
1602 +#else
1603 +static inline ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
1604 + char __user *buf,
1605 + int non_block,
1606 + size_t size)
1607 +{
1608 + return -EINVAL;
1609 +}
1610 +
1611 +static inline int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
1612 + size_t rsize, struct file *filp)
1613 +{
1614 + return -EINVAL;
1615 +}
1616 +#endif
1617 +
1618 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
1619 +{
1620 + /*
1621 + * On IA-64, we ran out of bits in the bottom 7 bits of the
1622 + * threadinfo bitmask.Thus we used a 2-stage approach by piggybacking
1623 + * on NOTIFY_RESUME and then in do_notify_resume() we demultiplex and
1624 + * call pfm_handle_work() if needed
1625 + */
1626 + set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
1627 +}
1628 +
1629 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
1630 +{
1631 + /*
1632 + * we cannot just clear TIF_NOTIFY_RESUME because other TIF flags are
1633 + * piggybackedonto it: TIF_PERFMON_WORK, TIF_RESTORE_RSE
1634 + *
1635 + * The tsk_clear_notify_resume() checks if any of those are set before
1636 + * clearing the * bit
1637 + */
1638 + tsk_clear_notify_resume(task);
1639 +}
1640 +
1641 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
1642 +{
1643 + return 0;
1644 +}
1645 +
1646 +extern struct pfm_ia64_pmu_info *pfm_ia64_pmu_info;
1647 +
1648 +#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context))
1649 +
1650 +/*
1651 + * IA-64 does not need extra alignment requirements for the sampling buffer
1652 + */
1653 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
1654 +
1655 +
1656 +static inline void pfm_release_dbregs(struct task_struct *task)
1657 +{
1658 + if (task->thread.flags & IA64_THREAD_DBG_VALID)
1659 + __pfm_release_dbregs(task);
1660 +}
1661 +
1662 +#define pfm_use_dbregs(_t) __pfm_use_dbregs(_t)
1663 +
1664 +static inline int pfm_arch_get_base_syscall(void)
1665 +{
1666 + return __NR_pfm_create_context;
1667 +}
1668 +
1669 +struct pfm_arch_pmu_info {
1670 + unsigned long mask_pmcs[PFM_PMC_BV]; /* modify on when masking */
1671 +};
1672 +
1673 +DECLARE_PER_CPU(u32, pfm_syst_info);
1674 +#else /* !CONFIG_PERFMON */
1675 +/*
1676 + * perfmon ia64-specific hooks
1677 + */
1678 +#define pfm_release_dbregs(_t) do { } while (0)
1679 +#define pfm_use_dbregs(_t) (0)
1680 +
1681 +#endif /* CONFIG_PERFMON */
1682 +
1683 +#endif /* __KERNEL__ */
1684 +#endif /* _ASM_IA64_PERFMON_KERN_H_ */
1685 diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
1686 index f88fa05..9d6af9c 100644
1687 --- a/arch/ia64/include/asm/processor.h
1688 +++ b/arch/ia64/include/asm/processor.h
1689 @@ -42,7 +42,6 @@
1690
1691 #define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */
1692 #define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */
1693 -#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
1694 #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */
1695 #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */
1696 #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration
1697 @@ -321,14 +320,6 @@ struct thread_struct {
1698 #else
1699 # define INIT_THREAD_IA32
1700 #endif /* CONFIG_IA32_SUPPORT */
1701 -#ifdef CONFIG_PERFMON
1702 - void *pfm_context; /* pointer to detailed PMU context */
1703 - unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */
1704 -# define INIT_THREAD_PM .pfm_context = NULL, \
1705 - .pfm_needs_checking = 0UL,
1706 -#else
1707 -# define INIT_THREAD_PM
1708 -#endif
1709 __u64 dbr[IA64_NUM_DBG_REGS];
1710 __u64 ibr[IA64_NUM_DBG_REGS];
1711 struct ia64_fpreg fph[96]; /* saved/loaded on demand */
1712 @@ -343,7 +334,6 @@ struct thread_struct {
1713 .task_size = DEFAULT_TASK_SIZE, \
1714 .last_fph_cpu = -1, \
1715 INIT_THREAD_IA32 \
1716 - INIT_THREAD_PM \
1717 .dbr = {0, }, \
1718 .ibr = {0, }, \
1719 .fph = {{{{0}}}, } \
1720 diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
1721 index 927a381..ab5aeea 100644
1722 --- a/arch/ia64/include/asm/system.h
1723 +++ b/arch/ia64/include/asm/system.h
1724 @@ -217,6 +217,7 @@ struct task_struct;
1725 extern void ia64_save_extra (struct task_struct *task);
1726 extern void ia64_load_extra (struct task_struct *task);
1727
1728 +
1729 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
1730 extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
1731 # define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
1732 @@ -224,16 +225,9 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
1733 # define IA64_ACCOUNT_ON_SWITCH(p,n)
1734 #endif
1735
1736 -#ifdef CONFIG_PERFMON
1737 - DECLARE_PER_CPU(unsigned long, pfm_syst_info);
1738 -# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
1739 -#else
1740 -# define PERFMON_IS_SYSWIDE() (0)
1741 -#endif
1742 -
1743 -#define IA64_HAS_EXTRA_STATE(t) \
1744 - ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \
1745 - || IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE())
1746 +#define IA64_HAS_EXTRA_STATE(t) \
1747 + (((t)->thread.flags & IA64_THREAD_DBG_VALID) \
1748 + || IS_IA32_PROCESS(task_pt_regs(t)))
1749
1750 #define __switch_to(prev,next,last) do { \
1751 IA64_ACCOUNT_ON_SWITCH(prev, next); \
1752 @@ -241,6 +235,10 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
1753 ia64_save_extra(prev); \
1754 if (IA64_HAS_EXTRA_STATE(next)) \
1755 ia64_load_extra(next); \
1756 + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
1757 + pfm_ctxsw_out(prev, next); \
1758 + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
1759 + pfm_ctxsw_in(prev, next); \
1760 ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \
1761 (last) = ia64_switch_to((next)); \
1762 } while (0)
1763 diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
1764 index 7c60fcd..3355332 100644
1765 --- a/arch/ia64/include/asm/thread_info.h
1766 +++ b/arch/ia64/include/asm/thread_info.h
1767 @@ -110,6 +110,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk);
1768 #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */
1769 #define TIF_FREEZE 20 /* is freezing for suspend */
1770 #define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */
1771 +#define TIF_PERFMON_CTXSW 22 /* perfmon needs ctxsw calls */
1772 +#define TIF_PERFMON_WORK 23 /* work for pfm_handle_work() */
1773
1774 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
1775 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
1776 @@ -123,6 +125,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk);
1777 #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED)
1778 #define _TIF_FREEZE (1 << TIF_FREEZE)
1779 #define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE)
1780 +#define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW)
1781 +#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK)
1782
1783 /* "work to do on user-return" bits */
1784 #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
1785 diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
1786 index d535833..29a43bc 100644
1787 --- a/arch/ia64/include/asm/unistd.h
1788 +++ b/arch/ia64/include/asm/unistd.h
1789 @@ -308,11 +308,23 @@
1790 #define __NR_dup3 1316
1791 #define __NR_pipe2 1317
1792 #define __NR_inotify_init1 1318
1793 +#define __NR_pfm_create_context 1319
1794 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
1795 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
1796 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
1797 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
1798 +#define __NR_pfm_start (__NR_pfm_create_context+5)
1799 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
1800 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
1801 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
1802 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
1803 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
1804 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
1805
1806 #ifdef __KERNEL__
1807
1808
1809 -#define NR_syscalls 295 /* length of syscall table */
1810 +#define NR_syscalls 307 /* length of syscall table */
1811
1812 /*
1813 * The following defines stop scripts/checksyscalls.sh from complaining about
1814 diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
1815 index 87fea11..b5ac54c 100644
1816 --- a/arch/ia64/kernel/Makefile
1817 +++ b/arch/ia64/kernel/Makefile
1818 @@ -5,7 +5,7 @@
1819 extra-y := head.o init_task.o vmlinux.lds
1820
1821 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
1822 - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
1823 + irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o ptrace.o sal.o \
1824 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
1825 unwind.o mca.o mca_asm.o topology.o
1826
1827 @@ -23,7 +23,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o
1828 obj-$(CONFIG_MODULES) += module.o
1829 obj-$(CONFIG_SMP) += smp.o smpboot.o
1830 obj-$(CONFIG_NUMA) += numa.o
1831 -obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
1832 obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
1833 obj-$(CONFIG_CPU_FREQ) += cpufreq/
1834 obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
1835 diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
1836 index 0dd6c14..f1c3e41 100644
1837 --- a/arch/ia64/kernel/entry.S
1838 +++ b/arch/ia64/kernel/entry.S
1839 @@ -1697,6 +1697,18 @@ sys_call_table:
1840 data8 sys_dup3
1841 data8 sys_pipe2
1842 data8 sys_inotify_init1
1843 + data8 sys_pfm_create_context
1844 + data8 sys_pfm_write_pmcs // 1320
1845 + data8 sys_pfm_write_pmds
1846 + data8 sys_pfm_read_pmds
1847 + data8 sys_pfm_load_context
1848 + data8 sys_pfm_start
1849 + data8 sys_pfm_stop // 1325
1850 + data8 sys_pfm_restart
1851 + data8 sys_pfm_create_evtsets
1852 + data8 sys_pfm_getinfo_evtsets
1853 + data8 sys_pfm_delete_evtsets
1854 + data8 sys_pfm_unload_context // 1330
1855
1856 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
1857 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
1858 diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
1859 index 28d3d48..ede8024 100644
1860 --- a/arch/ia64/kernel/irq_ia64.c
1861 +++ b/arch/ia64/kernel/irq_ia64.c
1862 @@ -40,10 +40,6 @@
1863 #include <asm/system.h>
1864 #include <asm/tlbflush.h>
1865
1866 -#ifdef CONFIG_PERFMON
1867 -# include <asm/perfmon.h>
1868 -#endif
1869 -
1870 #define IRQ_DEBUG 0
1871
1872 #define IRQ_VECTOR_UNASSIGNED (0)
1873 @@ -660,9 +656,6 @@ init_IRQ (void)
1874 }
1875 #endif
1876 #endif
1877 -#ifdef CONFIG_PERFMON
1878 - pfm_init_percpu();
1879 -#endif
1880 platform_irq_init();
1881 }
1882
1883 diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
1884 deleted file mode 100644
1885 index 5f637bb..0000000
1886 --- a/arch/ia64/kernel/perfmon_default_smpl.c
1887 +++ /dev/null
1888 @@ -1,296 +0,0 @@
1889 -/*
1890 - * Copyright (C) 2002-2003 Hewlett-Packard Co
1891 - * Stephane Eranian <eranian@hpl.hp.com>
1892 - *
1893 - * This file implements the default sampling buffer format
1894 - * for the Linux/ia64 perfmon-2 subsystem.
1895 - */
1896 -#include <linux/kernel.h>
1897 -#include <linux/types.h>
1898 -#include <linux/module.h>
1899 -#include <linux/init.h>
1900 -#include <asm/delay.h>
1901 -#include <linux/smp.h>
1902 -
1903 -#include <asm/perfmon.h>
1904 -#include <asm/perfmon_default_smpl.h>
1905 -
1906 -MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
1907 -MODULE_DESCRIPTION("perfmon default sampling format");
1908 -MODULE_LICENSE("GPL");
1909 -
1910 -#define DEFAULT_DEBUG 1
1911 -
1912 -#ifdef DEFAULT_DEBUG
1913 -#define DPRINT(a) \
1914 - do { \
1915 - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
1916 - } while (0)
1917 -
1918 -#define DPRINT_ovfl(a) \
1919 - do { \
1920 - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
1921 - } while (0)
1922 -
1923 -#else
1924 -#define DPRINT(a)
1925 -#define DPRINT_ovfl(a)
1926 -#endif
1927 -
1928 -static int
1929 -default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
1930 -{
1931 - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
1932 - int ret = 0;
1933 -
1934 - if (data == NULL) {
1935 - DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
1936 - return -EINVAL;
1937 - }
1938 -
1939 - DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
1940 -
1941 - /*
1942 - * must hold at least the buffer header + one minimally sized entry
1943 - */
1944 - if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
1945 -
1946 - DPRINT(("buf_size=%lu\n", arg->buf_size));
1947 -
1948 - return ret;
1949 -}
1950 -
1951 -static int
1952 -default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
1953 -{
1954 - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
1955 -
1956 - /*
1957 - * size has been validated in default_validate
1958 - */
1959 - *size = arg->buf_size;
1960 -
1961 - return 0;
1962 -}
1963 -
1964 -static int
1965 -default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
1966 -{
1967 - pfm_default_smpl_hdr_t *hdr;
1968 - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
1969 -
1970 - hdr = (pfm_default_smpl_hdr_t *)buf;
1971 -
1972 - hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
1973 - hdr->hdr_buf_size = arg->buf_size;
1974 - hdr->hdr_cur_offs = sizeof(*hdr);
1975 - hdr->hdr_overflows = 0UL;
1976 - hdr->hdr_count = 0UL;
1977 -
1978 - DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
1979 - task_pid_nr(task),
1980 - buf,
1981 - hdr->hdr_buf_size,
1982 - sizeof(*hdr),
1983 - hdr->hdr_version,
1984 - hdr->hdr_cur_offs));
1985 -
1986 - return 0;
1987 -}
1988 -
1989 -static int
1990 -default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
1991 -{
1992 - pfm_default_smpl_hdr_t *hdr;
1993 - pfm_default_smpl_entry_t *ent;
1994 - void *cur, *last;
1995 - unsigned long *e, entry_size;
1996 - unsigned int npmds, i;
1997 - unsigned char ovfl_pmd;
1998 - unsigned char ovfl_notify;
1999 -
2000 - if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
2001 - DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
2002 - return -EINVAL;
2003 - }
2004 -
2005 - hdr = (pfm_default_smpl_hdr_t *)buf;
2006 - cur = buf+hdr->hdr_cur_offs;
2007 - last = buf+hdr->hdr_buf_size;
2008 - ovfl_pmd = arg->ovfl_pmd;
2009 - ovfl_notify = arg->ovfl_notify;
2010 -
2011 - /*
2012 - * precheck for sanity
2013 - */
2014 - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
2015 -
2016 - npmds = hweight64(arg->smpl_pmds[0]);
2017 -
2018 - ent = (pfm_default_smpl_entry_t *)cur;
2019 -
2020 - prefetch(arg->smpl_pmds_values);
2021 -
2022 - entry_size = sizeof(*ent) + (npmds << 3);
2023 -
2024 - /* position for first pmd */
2025 - e = (unsigned long *)(ent+1);
2026 -
2027 - hdr->hdr_count++;
2028 -
2029 - DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
2030 - task->pid,
2031 - hdr->hdr_count,
2032 - cur, last,
2033 - last-cur,
2034 - ovfl_pmd,
2035 - ovfl_notify, npmds));
2036 -
2037 - /*
2038 - * current = task running at the time of the overflow.
2039 - *
2040 - * per-task mode:
2041 - * - this is ususally the task being monitored.
2042 - * Under certain conditions, it might be a different task
2043 - *
2044 - * system-wide:
2045 - * - this is not necessarily the task controlling the session
2046 - */
2047 - ent->pid = current->pid;
2048 - ent->ovfl_pmd = ovfl_pmd;
2049 - ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
2050 -
2051 - /*
2052 - * where did the fault happen (includes slot number)
2053 - */
2054 - ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
2055 -
2056 - ent->tstamp = stamp;
2057 - ent->cpu = smp_processor_id();
2058 - ent->set = arg->active_set;
2059 - ent->tgid = current->tgid;
2060 -
2061 - /*
2062 - * selectively store PMDs in increasing index number
2063 - */
2064 - if (npmds) {
2065 - unsigned long *val = arg->smpl_pmds_values;
2066 - for(i=0; i < npmds; i++) {
2067 - *e++ = *val++;
2068 - }
2069 - }
2070 -
2071 - /*
2072 - * update position for next entry
2073 - */
2074 - hdr->hdr_cur_offs += entry_size;
2075 - cur += entry_size;
2076 -
2077 - /*
2078 - * post check to avoid losing the last sample
2079 - */
2080 - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
2081 -
2082 - /*
2083 - * keep same ovfl_pmds, ovfl_notify
2084 - */
2085 - arg->ovfl_ctrl.bits.notify_user = 0;
2086 - arg->ovfl_ctrl.bits.block_task = 0;
2087 - arg->ovfl_ctrl.bits.mask_monitoring = 0;
2088 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
2089 -
2090 - return 0;
2091 -full:
2092 - DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
2093 -
2094 - /*
2095 - * increment number of buffer overflow.
2096 - * important to detect duplicate set of samples.
2097 - */
2098 - hdr->hdr_overflows++;
2099 -
2100 - /*
2101 - * if no notification requested, then we saturate the buffer
2102 - */
2103 - if (ovfl_notify == 0) {
2104 - arg->ovfl_ctrl.bits.notify_user = 0;
2105 - arg->ovfl_ctrl.bits.block_task = 0;
2106 - arg->ovfl_ctrl.bits.mask_monitoring = 1;
2107 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
2108 - } else {
2109 - arg->ovfl_ctrl.bits.notify_user = 1;
2110 - arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */
2111 - arg->ovfl_ctrl.bits.mask_monitoring = 1;
2112 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
2113 - }
2114 - return -1; /* we are full, sorry */
2115 -}
2116 -
2117 -static int
2118 -default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
2119 -{
2120 - pfm_default_smpl_hdr_t *hdr;
2121 -
2122 - hdr = (pfm_default_smpl_hdr_t *)buf;
2123 -
2124 - hdr->hdr_count = 0UL;
2125 - hdr->hdr_cur_offs = sizeof(*hdr);
2126 -
2127 - ctrl->bits.mask_monitoring = 0;
2128 - ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
2129 -
2130 - return 0;
2131 -}
2132 -
2133 -static int
2134 -default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
2135 -{
2136 - DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
2137 - return 0;
2138 -}
2139 -
2140 -static pfm_buffer_fmt_t default_fmt={
2141 - .fmt_name = "default_format",
2142 - .fmt_uuid = PFM_DEFAULT_SMPL_UUID,
2143 - .fmt_arg_size = sizeof(pfm_default_smpl_arg_t),
2144 - .fmt_validate = default_validate,
2145 - .fmt_getsize = default_get_size,
2146 - .fmt_init = default_init,
2147 - .fmt_handler = default_handler,
2148 - .fmt_restart = default_restart,
2149 - .fmt_restart_active = default_restart,
2150 - .fmt_exit = default_exit,
2151 -};
2152 -
2153 -static int __init
2154 -pfm_default_smpl_init_module(void)
2155 -{
2156 - int ret;
2157 -
2158 - ret = pfm_register_buffer_fmt(&default_fmt);
2159 - if (ret == 0) {
2160 - printk("perfmon_default_smpl: %s v%u.%u registered\n",
2161 - default_fmt.fmt_name,
2162 - PFM_DEFAULT_SMPL_VERSION_MAJ,
2163 - PFM_DEFAULT_SMPL_VERSION_MIN);
2164 - } else {
2165 - printk("perfmon_default_smpl: %s cannot register ret=%d\n",
2166 - default_fmt.fmt_name,
2167 - ret);
2168 - }
2169 -
2170 - return ret;
2171 -}
2172 -
2173 -static void __exit
2174 -pfm_default_smpl_cleanup_module(void)
2175 -{
2176 - int ret;
2177 - ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
2178 -
2179 - printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
2180 -}
2181 -
2182 -module_init(pfm_default_smpl_init_module);
2183 -module_exit(pfm_default_smpl_cleanup_module);
2184 -
2185 diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
2186 deleted file mode 100644
2187 index 6748947..0000000
2188 --- a/arch/ia64/kernel/perfmon_generic.h
2189 +++ /dev/null
2190 @@ -1,45 +0,0 @@
2191 -/*
2192 - * This file contains the generic PMU register description tables
2193 - * and pmc checker used by perfmon.c.
2194 - *
2195 - * Copyright (C) 2002-2003 Hewlett Packard Co
2196 - * Stephane Eranian <eranian@hpl.hp.com>
2197 - */
2198 -
2199 -static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
2200 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2201 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2202 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2203 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2204 -/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2205 -/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2206 -/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2207 -/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2208 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2209 -};
2210 -
2211 -static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
2212 -/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2213 -/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2214 -/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2215 -/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
2216 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
2217 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
2218 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
2219 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
2220 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2221 -};
2222 -
2223 -/*
2224 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2225 - */
2226 -static pmu_config_t pmu_conf_gen={
2227 - .pmu_name = "Generic",
2228 - .pmu_family = 0xff, /* any */
2229 - .ovfl_val = (1UL << 32) - 1,
2230 - .num_ibrs = 0, /* does not use */
2231 - .num_dbrs = 0, /* does not use */
2232 - .pmd_desc = pfm_gen_pmd_desc,
2233 - .pmc_desc = pfm_gen_pmc_desc
2234 -};
2235 -
2236 diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
2237 deleted file mode 100644
2238 index d1d508a..0000000
2239 --- a/arch/ia64/kernel/perfmon_itanium.h
2240 +++ /dev/null
2241 @@ -1,115 +0,0 @@
2242 -/*
2243 - * This file contains the Itanium PMU register description tables
2244 - * and pmc checker used by perfmon.c.
2245 - *
2246 - * Copyright (C) 2002-2003 Hewlett Packard Co
2247 - * Stephane Eranian <eranian@hpl.hp.com>
2248 - */
2249 -static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
2250 -
2251 -static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
2252 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2253 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2254 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2255 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2256 -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2257 -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2258 -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2259 -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2260 -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2261 -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2262 -/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2263 -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2264 -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2265 -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2266 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2267 -};
2268 -
2269 -static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
2270 -/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2271 -/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2272 -/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2273 -/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2274 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
2275 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
2276 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
2277 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
2278 -/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2279 -/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2280 -/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2281 -/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2282 -/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2283 -/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2284 -/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2285 -/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2286 -/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2287 -/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2288 - { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2289 -};
2290 -
2291 -static int
2292 -pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2293 -{
2294 - int ret;
2295 - int is_loaded;
2296 -
2297 - /* sanitfy check */
2298 - if (ctx == NULL) return -EINVAL;
2299 -
2300 - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
2301 -
2302 - /*
2303 - * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
2304 - * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
2305 - */
2306 - if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
2307 -
2308 - DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
2309 -
2310 - /* don't mix debug with perfmon */
2311 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2312 -
2313 - /*
2314 - * a count of 0 will mark the debug registers as in use and also
2315 - * ensure that they are properly cleared.
2316 - */
2317 - ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
2318 - if (ret) return ret;
2319 - }
2320 -
2321 - /*
2322 - * we must clear the (data) debug registers if pmc11.pt bit is cleared
2323 - * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
2324 - */
2325 - if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
2326 -
2327 - DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
2328 -
2329 - /* don't mix debug with perfmon */
2330 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2331 -
2332 - /*
2333 - * a count of 0 will mark the debug registers as in use and also
2334 - * ensure that they are properly cleared.
2335 - */
2336 - ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
2337 - if (ret) return ret;
2338 - }
2339 - return 0;
2340 -}
2341 -
2342 -/*
2343 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2344 - */
2345 -static pmu_config_t pmu_conf_ita={
2346 - .pmu_name = "Itanium",
2347 - .pmu_family = 0x7,
2348 - .ovfl_val = (1UL << 32) - 1,
2349 - .pmd_desc = pfm_ita_pmd_desc,
2350 - .pmc_desc = pfm_ita_pmc_desc,
2351 - .num_ibrs = 8,
2352 - .num_dbrs = 8,
2353 - .use_rr_dbregs = 1, /* debug register are use for range retrictions */
2354 -};
2355 -
2356 -
2357 diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
2358 deleted file mode 100644
2359 index c4bec7a..0000000
2360 --- a/arch/ia64/kernel/perfmon_mckinley.h
2361 +++ /dev/null
2362 @@ -1,187 +0,0 @@
2363 -/*
2364 - * This file contains the McKinley PMU register description tables
2365 - * and pmc checker used by perfmon.c.
2366 - *
2367 - * Copyright (C) 2002-2003 Hewlett Packard Co
2368 - * Stephane Eranian <eranian@hpl.hp.com>
2369 - */
2370 -static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
2371 -
2372 -static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
2373 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2374 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2375 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2376 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2377 -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2378 -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2379 -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2380 -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2381 -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2382 -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2383 -/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2384 -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2385 -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2386 -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2387 -/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2388 -/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
2389 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2390 -};
2391 -
2392 -static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
2393 -/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2394 -/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
2395 -/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2396 -/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2397 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
2398 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
2399 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
2400 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
2401 -/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2402 -/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2403 -/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2404 -/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2405 -/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2406 -/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2407 -/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2408 -/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2409 -/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
2410 -/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
2411 - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
2412 -};
2413 -
2414 -/*
2415 - * PMC reserved fields must have their power-up values preserved
2416 - */
2417 -static int
2418 -pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2419 -{
2420 - unsigned long tmp1, tmp2, ival = *val;
2421 -
2422 - /* remove reserved areas from user value */
2423 - tmp1 = ival & PMC_RSVD_MASK(cnum);
2424 -
2425 - /* get reserved fields values */
2426 - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
2427 -
2428 - *val = tmp1 | tmp2;
2429 -
2430 - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
2431 - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
2432 - return 0;
2433 -}
2434 -
2435 -/*
2436 - * task can be NULL if the context is unloaded
2437 - */
2438 -static int
2439 -pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2440 -{
2441 - int ret = 0, check_case1 = 0;
2442 - unsigned long val8 = 0, val14 = 0, val13 = 0;
2443 - int is_loaded;
2444 -
2445 - /* first preserve the reserved fields */
2446 - pfm_mck_reserved(cnum, val, regs);
2447 -
2448 - /* sanitfy check */
2449 - if (ctx == NULL) return -EINVAL;
2450 -
2451 - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
2452 -
2453 - /*
2454 - * we must clear the debug registers if pmc13 has a value which enable
2455 - * memory pipeline event constraints. In this case we need to clear the
2456 - * the debug registers if they have not yet been accessed. This is required
2457 - * to avoid picking stale state.
2458 - * PMC13 is "active" if:
2459 - * one of the pmc13.cfg_dbrpXX field is different from 0x3
2460 - * AND
2461 - * at the corresponding pmc13.ena_dbrpXX is set.
2462 - */
2463 - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
2464 -
2465 - if (cnum == 13 && is_loaded
2466 - && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
2467 -
2468 - DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
2469 -
2470 - /* don't mix debug with perfmon */
2471 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2472 -
2473 - /*
2474 - * a count of 0 will mark the debug registers as in use and also
2475 - * ensure that they are properly cleared.
2476 - */
2477 - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
2478 - if (ret) return ret;
2479 - }
2480 - /*
2481 - * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
2482 - * before they are (fl_using_dbreg==0) to avoid picking up stale information.
2483 - */
2484 - if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
2485 -
2486 - DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
2487 -
2488 - /* don't mix debug with perfmon */
2489 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2490 -
2491 - /*
2492 - * a count of 0 will mark the debug registers as in use and also
2493 - * ensure that they are properly cleared.
2494 - */
2495 - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
2496 - if (ret) return ret;
2497 -
2498 - }
2499 -
2500 - switch(cnum) {
2501 - case 4: *val |= 1UL << 23; /* force power enable bit */
2502 - break;
2503 - case 8: val8 = *val;
2504 - val13 = ctx->ctx_pmcs[13];
2505 - val14 = ctx->ctx_pmcs[14];
2506 - check_case1 = 1;
2507 - break;
2508 - case 13: val8 = ctx->ctx_pmcs[8];
2509 - val13 = *val;
2510 - val14 = ctx->ctx_pmcs[14];
2511 - check_case1 = 1;
2512 - break;
2513 - case 14: val8 = ctx->ctx_pmcs[8];
2514 - val13 = ctx->ctx_pmcs[13];
2515 - val14 = *val;
2516 - check_case1 = 1;
2517 - break;
2518 - }
2519 - /* check illegal configuration which can produce inconsistencies in tagging
2520 - * i-side events in L1D and L2 caches
2521 - */
2522 - if (check_case1) {
2523 - ret = ((val13 >> 45) & 0xf) == 0
2524 - && ((val8 & 0x1) == 0)
2525 - && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
2526 - ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
2527 -
2528 - if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
2529 - }
2530 -
2531 - return ret ? -EINVAL : 0;
2532 -}
2533 -
2534 -/*
2535 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2536 - */
2537 -static pmu_config_t pmu_conf_mck={
2538 - .pmu_name = "Itanium 2",
2539 - .pmu_family = 0x1f,
2540 - .flags = PFM_PMU_IRQ_RESEND,
2541 - .ovfl_val = (1UL << 47) - 1,
2542 - .pmd_desc = pfm_mck_pmd_desc,
2543 - .pmc_desc = pfm_mck_pmc_desc,
2544 - .num_ibrs = 8,
2545 - .num_dbrs = 8,
2546 - .use_rr_dbregs = 1 /* debug register are use for range restrictions */
2547 -};
2548 -
2549 -
2550 diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h
2551 deleted file mode 100644
2552 index 7f8da4c..0000000
2553 --- a/arch/ia64/kernel/perfmon_montecito.h
2554 +++ /dev/null
2555 @@ -1,269 +0,0 @@
2556 -/*
2557 - * This file contains the Montecito PMU register description tables
2558 - * and pmc checker used by perfmon.c.
2559 - *
2560 - * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
2561 - * Contributed by Stephane Eranian <eranian@hpl.hp.com>
2562 - */
2563 -static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
2564 -
2565 -#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
2566 - RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
2567 -#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36))
2568 -#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35))
2569 -
2570 -static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
2571 -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2572 -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2573 -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2574 -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
2575 -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
2576 -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
2577 -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
2578 -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
2579 -/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
2580 -/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
2581 -/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
2582 -/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
2583 -/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
2584 -/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
2585 -/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
2586 -/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
2587 -/* pmc16 */ { PFM_REG_NOTIMPL, },
2588 -/* pmc17 */ { PFM_REG_NOTIMPL, },
2589 -/* pmc18 */ { PFM_REG_NOTIMPL, },
2590 -/* pmc19 */ { PFM_REG_NOTIMPL, },
2591 -/* pmc20 */ { PFM_REG_NOTIMPL, },
2592 -/* pmc21 */ { PFM_REG_NOTIMPL, },
2593 -/* pmc22 */ { PFM_REG_NOTIMPL, },
2594 -/* pmc23 */ { PFM_REG_NOTIMPL, },
2595 -/* pmc24 */ { PFM_REG_NOTIMPL, },
2596 -/* pmc25 */ { PFM_REG_NOTIMPL, },
2597 -/* pmc26 */ { PFM_REG_NOTIMPL, },
2598 -/* pmc27 */ { PFM_REG_NOTIMPL, },
2599 -/* pmc28 */ { PFM_REG_NOTIMPL, },
2600 -/* pmc29 */ { PFM_REG_NOTIMPL, },
2601 -/* pmc30 */ { PFM_REG_NOTIMPL, },
2602 -/* pmc31 */ { PFM_REG_NOTIMPL, },
2603 -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2604 -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2605 -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2606 -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2607 -/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2608 -/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
2609 -/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2610 -/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
2611 -/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
2612 -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
2613 -/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
2614 - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
2615 -};
2616 -
2617 -static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
2618 -/* pmd0 */ { PFM_REG_NOTIMPL, },
2619 -/* pmd1 */ { PFM_REG_NOTIMPL, },
2620 -/* pmd2 */ { PFM_REG_NOTIMPL, },
2621 -/* pmd3 */ { PFM_REG_NOTIMPL, },
2622 -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
2623 -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
2624 -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
2625 -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
2626 -/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}},
2627 -/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
2628 -/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
2629 -/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
2630 -/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
2631 -/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
2632 -/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
2633 -/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
2634 -/* pmd16 */ { PFM_REG_NOTIMPL, },
2635 -/* pmd17 */ { PFM_REG_NOTIMPL, },
2636 -/* pmd18 */ { PFM_REG_NOTIMPL, },
2637 -/* pmd19 */ { PFM_REG_NOTIMPL, },
2638 -/* pmd20 */ { PFM_REG_NOTIMPL, },
2639 -/* pmd21 */ { PFM_REG_NOTIMPL, },
2640 -/* pmd22 */ { PFM_REG_NOTIMPL, },
2641 -/* pmd23 */ { PFM_REG_NOTIMPL, },
2642 -/* pmd24 */ { PFM_REG_NOTIMPL, },
2643 -/* pmd25 */ { PFM_REG_NOTIMPL, },
2644 -/* pmd26 */ { PFM_REG_NOTIMPL, },
2645 -/* pmd27 */ { PFM_REG_NOTIMPL, },
2646 -/* pmd28 */ { PFM_REG_NOTIMPL, },
2647 -/* pmd29 */ { PFM_REG_NOTIMPL, },
2648 -/* pmd30 */ { PFM_REG_NOTIMPL, },
2649 -/* pmd31 */ { PFM_REG_NOTIMPL, },
2650 -/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
2651 -/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
2652 -/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
2653 -/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
2654 -/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
2655 -/* pmd37 */ { PFM_REG_NOTIMPL, },
2656 -/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2657 -/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2658 -/* pmd40 */ { PFM_REG_NOTIMPL, },
2659 -/* pmd41 */ { PFM_REG_NOTIMPL, },
2660 -/* pmd42 */ { PFM_REG_NOTIMPL, },
2661 -/* pmd43 */ { PFM_REG_NOTIMPL, },
2662 -/* pmd44 */ { PFM_REG_NOTIMPL, },
2663 -/* pmd45 */ { PFM_REG_NOTIMPL, },
2664 -/* pmd46 */ { PFM_REG_NOTIMPL, },
2665 -/* pmd47 */ { PFM_REG_NOTIMPL, },
2666 -/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2667 -/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2668 -/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2669 -/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2670 -/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2671 -/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2672 -/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2673 -/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2674 -/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2675 -/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2676 -/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2677 -/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2678 -/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2679 -/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2680 -/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2681 -/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
2682 - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
2683 -};
2684 -
2685 -/*
2686 - * PMC reserved fields must have their power-up values preserved
2687 - */
2688 -static int
2689 -pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2690 -{
2691 - unsigned long tmp1, tmp2, ival = *val;
2692 -
2693 - /* remove reserved areas from user value */
2694 - tmp1 = ival & PMC_RSVD_MASK(cnum);
2695 -
2696 - /* get reserved fields values */
2697 - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
2698 -
2699 - *val = tmp1 | tmp2;
2700 -
2701 - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
2702 - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
2703 - return 0;
2704 -}
2705 -
2706 -/*
2707 - * task can be NULL if the context is unloaded
2708 - */
2709 -static int
2710 -pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
2711 -{
2712 - int ret = 0;
2713 - unsigned long val32 = 0, val38 = 0, val41 = 0;
2714 - unsigned long tmpval;
2715 - int check_case1 = 0;
2716 - int is_loaded;
2717 -
2718 - /* first preserve the reserved fields */
2719 - pfm_mont_reserved(cnum, val, regs);
2720 -
2721 - tmpval = *val;
2722 -
2723 - /* sanity check */
2724 - if (ctx == NULL) return -EINVAL;
2725 -
2726 - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
2727 -
2728 - /*
2729 - * we must clear the debug registers if pmc41 has a value which enable
2730 - * memory pipeline event constraints. In this case we need to clear the
2731 - * the debug registers if they have not yet been accessed. This is required
2732 - * to avoid picking stale state.
2733 - * PMC41 is "active" if:
2734 - * one of the pmc41.cfg_dtagXX field is different from 0x3
2735 - * AND
2736 - * at the corresponding pmc41.en_dbrpXX is set.
2737 - * AND
2738 - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
2739 - */
2740 - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
2741 -
2742 - if (cnum == 41 && is_loaded
2743 - && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
2744 -
2745 - DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
2746 -
2747 - /* don't mix debug with perfmon */
2748 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2749 -
2750 - /*
2751 - * a count of 0 will mark the debug registers if:
2752 - * AND
2753 - */
2754 - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
2755 - if (ret) return ret;
2756 - }
2757 - /*
2758 - * we must clear the (instruction) debug registers if:
2759 - * pmc38.ig_ibrpX is 0 (enabled)
2760 - * AND
2761 - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
2762 - */
2763 - if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
2764 -
2765 - DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
2766 -
2767 - /* don't mix debug with perfmon */
2768 - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
2769 -
2770 - /*
2771 - * a count of 0 will mark the debug registers as in use and also
2772 - * ensure that they are properly cleared.
2773 - */
2774 - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
2775 - if (ret) return ret;
2776 -
2777 - }
2778 - switch(cnum) {
2779 - case 32: val32 = *val;
2780 - val38 = ctx->ctx_pmcs[38];
2781 - val41 = ctx->ctx_pmcs[41];
2782 - check_case1 = 1;
2783 - break;
2784 - case 38: val38 = *val;
2785 - val32 = ctx->ctx_pmcs[32];
2786 - val41 = ctx->ctx_pmcs[41];
2787 - check_case1 = 1;
2788 - break;
2789 - case 41: val41 = *val;
2790 - val32 = ctx->ctx_pmcs[32];
2791 - val38 = ctx->ctx_pmcs[38];
2792 - check_case1 = 1;
2793 - break;
2794 - }
2795 - /* check illegal configuration which can produce inconsistencies in tagging
2796 - * i-side events in L1D and L2 caches
2797 - */
2798 - if (check_case1) {
2799 - ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
2800 - && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
2801 - || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
2802 - if (ret) {
2803 - DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
2804 - return -EINVAL;
2805 - }
2806 - }
2807 - *val = tmpval;
2808 - return 0;
2809 -}
2810 -
2811 -/*
2812 - * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
2813 - */
2814 -static pmu_config_t pmu_conf_mont={
2815 - .pmu_name = "Montecito",
2816 - .pmu_family = 0x20,
2817 - .flags = PFM_PMU_IRQ_RESEND,
2818 - .ovfl_val = (1UL << 47) - 1,
2819 - .pmd_desc = pfm_mont_pmd_desc,
2820 - .pmc_desc = pfm_mont_pmc_desc,
2821 - .num_ibrs = 8,
2822 - .num_dbrs = 8,
2823 - .use_rr_dbregs = 1 /* debug register are use for range retrictions */
2824 -};
2825 diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
2826 index 3ab8373..a7dfb39 100644
2827 --- a/arch/ia64/kernel/process.c
2828 +++ b/arch/ia64/kernel/process.c
2829 @@ -28,6 +28,7 @@
2830 #include <linux/delay.h>
2831 #include <linux/kdebug.h>
2832 #include <linux/utsname.h>
2833 +#include <linux/perfmon_kern.h>
2834
2835 #include <asm/cpu.h>
2836 #include <asm/delay.h>
2837 @@ -45,10 +46,6 @@
2838
2839 #include "entry.h"
2840
2841 -#ifdef CONFIG_PERFMON
2842 -# include <asm/perfmon.h>
2843 -#endif
2844 -
2845 #include "sigframe.h"
2846
2847 void (*ia64_mark_idle)(int);
2848 @@ -162,10 +159,8 @@ show_regs (struct pt_regs *regs)
2849
2850 void tsk_clear_notify_resume(struct task_struct *tsk)
2851 {
2852 -#ifdef CONFIG_PERFMON
2853 - if (tsk->thread.pfm_needs_checking)
2854 + if (test_ti_thread_flag(task_thread_info(tsk), TIF_PERFMON_WORK))
2855 return;
2856 -#endif
2857 if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE))
2858 return;
2859 clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME);
2860 @@ -188,14 +183,9 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
2861 return;
2862 }
2863
2864 -#ifdef CONFIG_PERFMON
2865 - if (current->thread.pfm_needs_checking)
2866 - /*
2867 - * Note: pfm_handle_work() allow us to call it with interrupts
2868 - * disabled, and may enable interrupts within the function.
2869 - */
2870 - pfm_handle_work();
2871 -#endif
2872 + /* process perfmon asynchronous work (e.g. block thread or reset) */
2873 + if (test_thread_flag(TIF_PERFMON_WORK))
2874 + pfm_handle_work(task_pt_regs(current));
2875
2876 /* deal with pending signal delivery */
2877 if (test_thread_flag(TIF_SIGPENDING)) {
2878 @@ -212,22 +202,15 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
2879 local_irq_disable(); /* force interrupt disable */
2880 }
2881
2882 -static int pal_halt = 1;
2883 static int can_do_pal_halt = 1;
2884
2885 static int __init nohalt_setup(char * str)
2886 {
2887 - pal_halt = can_do_pal_halt = 0;
2888 + can_do_pal_halt = 0;
2889 return 1;
2890 }
2891 __setup("nohalt", nohalt_setup);
2892
2893 -void
2894 -update_pal_halt_status(int status)
2895 -{
2896 - can_do_pal_halt = pal_halt && status;
2897 -}
2898 -
2899 /*
2900 * We use this if we don't have any better idle routine..
2901 */
2902 @@ -236,6 +219,22 @@ default_idle (void)
2903 {
2904 local_irq_enable();
2905 while (!need_resched()) {
2906 +#ifdef CONFIG_PERFMON
2907 + u64 psr = 0;
2908 + /*
2909 + * If requested, we stop the PMU to avoid
2910 + * measuring across the core idle loop.
2911 + *
2912 + * dcr.pp is not modified on purpose
2913 + * it is used when coming out of
2914 + * safe_halt() via interrupt
2915 + */
2916 + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) {
2917 + psr = ia64_getreg(_IA64_REG_PSR);
2918 + if (psr & IA64_PSR_PP)
2919 + ia64_rsm(IA64_PSR_PP);
2920 + }
2921 +#endif
2922 if (can_do_pal_halt) {
2923 local_irq_disable();
2924 if (!need_resched()) {
2925 @@ -244,6 +243,12 @@ default_idle (void)
2926 local_irq_enable();
2927 } else
2928 cpu_relax();
2929 +#ifdef CONFIG_PERFMON
2930 + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) {
2931 + if (psr & IA64_PSR_PP)
2932 + ia64_ssm(IA64_PSR_PP);
2933 + }
2934 +#endif
2935 }
2936 }
2937
2938 @@ -344,22 +349,9 @@ cpu_idle (void)
2939 void
2940 ia64_save_extra (struct task_struct *task)
2941 {
2942 -#ifdef CONFIG_PERFMON
2943 - unsigned long info;
2944 -#endif
2945 -
2946 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
2947 ia64_save_debug_regs(&task->thread.dbr[0]);
2948
2949 -#ifdef CONFIG_PERFMON
2950 - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
2951 - pfm_save_regs(task);
2952 -
2953 - info = __get_cpu_var(pfm_syst_info);
2954 - if (info & PFM_CPUINFO_SYST_WIDE)
2955 - pfm_syst_wide_update_task(task, info, 0);
2956 -#endif
2957 -
2958 #ifdef CONFIG_IA32_SUPPORT
2959 if (IS_IA32_PROCESS(task_pt_regs(task)))
2960 ia32_save_state(task);
2961 @@ -369,22 +361,9 @@ ia64_save_extra (struct task_struct *task)
2962 void
2963 ia64_load_extra (struct task_struct *task)
2964 {
2965 -#ifdef CONFIG_PERFMON
2966 - unsigned long info;
2967 -#endif
2968 -
2969 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
2970 ia64_load_debug_regs(&task->thread.dbr[0]);
2971
2972 -#ifdef CONFIG_PERFMON
2973 - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
2974 - pfm_load_regs(task);
2975 -
2976 - info = __get_cpu_var(pfm_syst_info);
2977 - if (info & PFM_CPUINFO_SYST_WIDE)
2978 - pfm_syst_wide_update_task(task, info, 1);
2979 -#endif
2980 -
2981 #ifdef CONFIG_IA32_SUPPORT
2982 if (IS_IA32_PROCESS(task_pt_regs(task)))
2983 ia32_load_state(task);
2984 @@ -510,8 +489,7 @@ copy_thread (int nr, unsigned long clone_flags,
2985 * call behavior where scratch registers are preserved across
2986 * system calls (unless used by the system call itself).
2987 */
2988 -# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
2989 - | IA64_THREAD_PM_VALID)
2990 +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID)
2991 # define THREAD_FLAGS_TO_SET 0
2992 p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
2993 | THREAD_FLAGS_TO_SET);
2994 @@ -533,10 +511,8 @@ copy_thread (int nr, unsigned long clone_flags,
2995 }
2996 #endif
2997
2998 -#ifdef CONFIG_PERFMON
2999 - if (current->thread.pfm_context)
3000 - pfm_inherit(p, child_ptregs);
3001 -#endif
3002 + pfm_copy_thread(p);
3003 +
3004 return retval;
3005 }
3006
3007 @@ -745,15 +721,13 @@ exit_thread (void)
3008 {
3009
3010 ia64_drop_fpu(current);
3011 -#ifdef CONFIG_PERFMON
3012 - /* if needed, stop monitoring and flush state to perfmon context */
3013 - if (current->thread.pfm_context)
3014 - pfm_exit_thread(current);
3015 +
3016 + /* if needed, stop monitoring and flush state to perfmon context */
3017 + pfm_exit_thread();
3018
3019 /* free debug register resources */
3020 - if (current->thread.flags & IA64_THREAD_DBG_VALID)
3021 - pfm_release_debug_registers(current);
3022 -#endif
3023 + pfm_release_dbregs(current);
3024 +
3025 if (IS_IA32_PROCESS(task_pt_regs(current)))
3026 ia32_drop_ia64_partial_page_list(current);
3027 }
3028 diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
3029 index 2a9943b..bb1ca1e 100644
3030 --- a/arch/ia64/kernel/ptrace.c
3031 +++ b/arch/ia64/kernel/ptrace.c
3032 @@ -20,6 +20,7 @@
3033 #include <linux/security.h>
3034 #include <linux/audit.h>
3035 #include <linux/signal.h>
3036 +#include <linux/perfmon_kern.h>
3037 #include <linux/regset.h>
3038 #include <linux/elf.h>
3039
3040 @@ -30,9 +31,6 @@
3041 #include <asm/system.h>
3042 #include <asm/uaccess.h>
3043 #include <asm/unwind.h>
3044 -#ifdef CONFIG_PERFMON
3045 -#include <asm/perfmon.h>
3046 -#endif
3047
3048 #include "entry.h"
3049
3050 @@ -2124,7 +2122,6 @@ access_uarea(struct task_struct *child, unsigned long addr,
3051 "address 0x%lx\n", addr);
3052 return -1;
3053 }
3054 -#ifdef CONFIG_PERFMON
3055 /*
3056 * Check if debug registers are used by perfmon. This
3057 * test must be done once we know that we can do the
3058 @@ -2142,9 +2139,8 @@ access_uarea(struct task_struct *child, unsigned long addr,
3059 * IA64_THREAD_DBG_VALID. The registers are restored
3060 * by the PMU context switch code.
3061 */
3062 - if (pfm_use_debug_registers(child))
3063 + if (pfm_use_dbregs(child))
3064 return -1;
3065 -#endif
3066
3067 if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
3068 child->thread.flags |= IA64_THREAD_DBG_VALID;
3069 diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
3070 index de636b2..677fa68 100644
3071 --- a/arch/ia64/kernel/setup.c
3072 +++ b/arch/ia64/kernel/setup.c
3073 @@ -45,6 +45,7 @@
3074 #include <linux/cpufreq.h>
3075 #include <linux/kexec.h>
3076 #include <linux/crash_dump.h>
3077 +#include <linux/perfmon_kern.h>
3078
3079 #include <asm/ia32.h>
3080 #include <asm/machvec.h>
3081 @@ -1051,6 +1052,8 @@ cpu_init (void)
3082 }
3083 platform_cpu_init();
3084 pm_idle = default_idle;
3085 +
3086 + pfm_init_percpu();
3087 }
3088
3089 void __init
3090 diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
3091 index d8f05e5..3d7a739 100644
3092 --- a/arch/ia64/kernel/smpboot.c
3093 +++ b/arch/ia64/kernel/smpboot.c
3094 @@ -39,6 +39,7 @@
3095 #include <linux/efi.h>
3096 #include <linux/percpu.h>
3097 #include <linux/bitops.h>
3098 +#include <linux/perfmon_kern.h>
3099
3100 #include <asm/atomic.h>
3101 #include <asm/cache.h>
3102 @@ -381,10 +382,6 @@ smp_callin (void)
3103 extern void ia64_init_itm(void);
3104 extern volatile int time_keeper_id;
3105
3106 -#ifdef CONFIG_PERFMON
3107 - extern void pfm_init_percpu(void);
3108 -#endif
3109 -
3110 cpuid = smp_processor_id();
3111 phys_id = hard_smp_processor_id();
3112 itc_master = time_keeper_id;
3113 @@ -410,10 +407,6 @@ smp_callin (void)
3114
3115 ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
3116
3117 -#ifdef CONFIG_PERFMON
3118 - pfm_init_percpu();
3119 -#endif
3120 -
3121 local_irq_enable();
3122
3123 if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
3124 @@ -751,6 +744,7 @@ int __cpu_disable(void)
3125 cpu_clear(cpu, cpu_online_map);
3126 local_flush_tlb_all();
3127 cpu_clear(cpu, cpu_callin_map);
3128 + pfm_cpu_disable();
3129 return 0;
3130 }
3131
3132 diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
3133 index bcbb6d8..a0ed33a 100644
3134 --- a/arch/ia64/kernel/sys_ia64.c
3135 +++ b/arch/ia64/kernel/sys_ia64.c
3136 @@ -284,3 +284,11 @@ sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, un
3137 }
3138
3139 #endif /* CONFIG_PCI */
3140 +
3141 +#ifndef CONFIG_IA64_PERFMON_COMPAT
3142 +asmlinkage long
3143 +sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
3144 +{
3145 + return -ENOSYS;
3146 +}
3147 +#endif
3148 diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
3149 index 98771e2..077fd09 100644
3150 --- a/arch/ia64/lib/Makefile
3151 +++ b/arch/ia64/lib/Makefile
3152 @@ -13,7 +13,6 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
3153
3154 obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
3155 obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
3156 -lib-$(CONFIG_PERFMON) += carta_random.o
3157
3158 AFLAGS___divdi3.o =
3159 AFLAGS___udivdi3.o = -DUNSIGNED
3160 diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
3161 index 125a602..892de6a 100644
3162 --- a/arch/ia64/oprofile/init.c
3163 +++ b/arch/ia64/oprofile/init.c
3164 @@ -12,8 +12,8 @@
3165 #include <linux/init.h>
3166 #include <linux/errno.h>
3167
3168 -extern int perfmon_init(struct oprofile_operations * ops);
3169 -extern void perfmon_exit(void);
3170 +extern int op_perfmon_init(struct oprofile_operations * ops);
3171 +extern void op_perfmon_exit(void);
3172 extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
3173
3174 int __init oprofile_arch_init(struct oprofile_operations * ops)
3175 @@ -22,7 +22,7 @@ int __init oprofile_arch_init(struct oprofile_operations * ops)
3176
3177 #ifdef CONFIG_PERFMON
3178 /* perfmon_init() can fail, but we have no way to report it */
3179 - ret = perfmon_init(ops);
3180 + ret = op_perfmon_init(ops);
3181 #endif
3182 ops->backtrace = ia64_backtrace;
3183
3184 @@ -33,6 +33,6 @@ int __init oprofile_arch_init(struct oprofile_operations * ops)
3185 void oprofile_arch_exit(void)
3186 {
3187 #ifdef CONFIG_PERFMON
3188 - perfmon_exit();
3189 + op_perfmon_exit();
3190 #endif
3191 }
3192 diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
3193 index bc41dd3..6fa9d17 100644
3194 --- a/arch/ia64/oprofile/perfmon.c
3195 +++ b/arch/ia64/oprofile/perfmon.c
3196 @@ -10,25 +10,30 @@
3197 #include <linux/kernel.h>
3198 #include <linux/oprofile.h>
3199 #include <linux/sched.h>
3200 -#include <asm/perfmon.h>
3201 +#include <linux/module.h>
3202 +#include <linux/perfmon_kern.h>
3203 #include <asm/ptrace.h>
3204 #include <asm/errno.h>
3205
3206 static int allow_ints;
3207
3208 static int
3209 -perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
3210 - struct pt_regs *regs, unsigned long stamp)
3211 +perfmon_handler(struct pfm_context *ctx,
3212 + unsigned long ip, u64 stamp, void *data)
3213 {
3214 - int event = arg->pmd_eventid;
3215 + struct pt_regs *regs;
3216 + struct pfm_ovfl_arg *arg;
3217 +
3218 + regs = data;
3219 + arg = &ctx->ovfl_arg;
3220
3221 - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
3222 + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
3223
3224 /* the owner of the oprofile event buffer may have exited
3225 * without perfmon being shutdown (e.g. SIGSEGV)
3226 */
3227 if (allow_ints)
3228 - oprofile_add_sample(regs, event);
3229 + oprofile_add_sample(regs, arg->pmd_eventid);
3230 return 0;
3231 }
3232
3233 @@ -45,17 +50,13 @@ static void perfmon_stop(void)
3234 allow_ints = 0;
3235 }
3236
3237 -
3238 -#define OPROFILE_FMT_UUID { \
3239 - 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
3240 -
3241 -static pfm_buffer_fmt_t oprofile_fmt = {
3242 - .fmt_name = "oprofile_format",
3243 - .fmt_uuid = OPROFILE_FMT_UUID,
3244 - .fmt_handler = perfmon_handler,
3245 +static struct pfm_smpl_fmt oprofile_fmt = {
3246 + .fmt_name = "OProfile",
3247 + .fmt_handler = perfmon_handler,
3248 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
3249 + .owner = THIS_MODULE
3250 };
3251
3252 -
3253 static char * get_cpu_type(void)
3254 {
3255 __u8 family = local_cpu_data->family;
3256 @@ -75,9 +76,9 @@ static char * get_cpu_type(void)
3257
3258 static int using_perfmon;
3259
3260 -int perfmon_init(struct oprofile_operations * ops)
3261 +int __init op_perfmon_init(struct oprofile_operations * ops)
3262 {
3263 - int ret = pfm_register_buffer_fmt(&oprofile_fmt);
3264 + int ret = pfm_fmt_register(&oprofile_fmt);
3265 if (ret)
3266 return -ENODEV;
3267
3268 @@ -90,10 +91,10 @@ int perfmon_init(struct oprofile_operations * ops)
3269 }
3270
3271
3272 -void perfmon_exit(void)
3273 +void __exit op_perfmon_exit(void)
3274 {
3275 if (!using_perfmon)
3276 return;
3277
3278 - pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
3279 + pfm_fmt_unregister(&oprofile_fmt);
3280 }
3281 diff --git a/arch/ia64/perfmon/Kconfig b/arch/ia64/perfmon/Kconfig
3282 new file mode 100644
3283 index 0000000..99c68bd
3284 --- /dev/null
3285 +++ b/arch/ia64/perfmon/Kconfig
3286 @@ -0,0 +1,67 @@
3287 +menu "Hardware Performance Monitoring support"
3288 +config PERFMON
3289 + bool "Perfmon2 performance monitoring interface"
3290 + default n
3291 + help
3292 + Enables the perfmon2 interface to access the hardware
3293 + performance counters. See <http://perfmon2.sf.net/> for
3294 + more details.
3295 +
3296 +config PERFMON_DEBUG
3297 + bool "Perfmon debugging"
3298 + default n
3299 + depends on PERFMON
3300 + help
3301 + Enables perfmon debugging support
3302 +
3303 +config PERFMON_DEBUG_FS
3304 + bool "Enable perfmon statistics reporting via debugfs"
3305 + default y
3306 + depends on PERFMON && DEBUG_FS
3307 + help
3308 + Enable collection and reporting of perfmon timing statistics under
3309 + debugfs. This is used for debugging and performance analysis of the
3310 + subsystem. The debugfs filesystem must be mounted.
3311 +
3312 +config IA64_PERFMON_COMPAT
3313 + bool "Enable old perfmon-2 compatbility mode"
3314 + default n
3315 + depends on PERFMON
3316 + help
3317 + Enable this option to allow performance tools which used the old
3318 + perfmon-2 interface to continue to work. Old tools are those using
3319 + the obsolete commands and arguments. Check your programs and look
3320 + in include/asm-ia64/perfmon_compat.h for more information.
3321 +
3322 +config IA64_PERFMON_GENERIC
3323 + tristate "Generic IA-64 PMU support"
3324 + depends on PERFMON
3325 + default n
3326 + help
3327 + Enables generic IA-64 PMU support.
3328 + The generic PMU is defined by the IA-64 architecture document.
3329 + This option should only be necessary when running with a PMU that
3330 + is not yet explicitely supported. Even then, there is no guarantee
3331 + that this support will work.
3332 +
3333 +config IA64_PERFMON_ITANIUM
3334 + tristate "Itanium (Merced) Performance Monitoring support"
3335 + depends on PERFMON
3336 + default n
3337 + help
3338 + Enables Itanium (Merced) PMU support.
3339 +
3340 +config IA64_PERFMON_MCKINLEY
3341 + tristate "Itanium 2 (McKinley) Performance Monitoring support"
3342 + depends on PERFMON
3343 + default n
3344 + help
3345 + Enables Itanium 2 (McKinley, Madison, Deerfield) PMU support.
3346 +
3347 +config IA64_PERFMON_MONTECITO
3348 + tristate "Itanium 2 9000 (Montecito) Performance Monitoring support"
3349 + depends on PERFMON
3350 + default n
3351 + help
3352 + Enables support for Itanium 2 9000 (Montecito) PMU.
3353 +endmenu
3354 diff --git a/arch/ia64/perfmon/Makefile b/arch/ia64/perfmon/Makefile
3355 new file mode 100644
3356 index 0000000..c9cdf9f
3357 --- /dev/null
3358 +++ b/arch/ia64/perfmon/Makefile
3359 @@ -0,0 +1,11 @@
3360 +#
3361 +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
3362 +# Contributed by Stephane Eranian <eranian@hpl.hp.com>
3363 +#
3364 +obj-$(CONFIG_PERFMON) += perfmon.o
3365 +obj-$(CONFIG_IA64_PERFMON_COMPAT) += perfmon_default_smpl.o \
3366 + perfmon_compat.o
3367 +obj-$(CONFIG_IA64_PERFMON_GENERIC) += perfmon_generic.o
3368 +obj-$(CONFIG_IA64_PERFMON_ITANIUM) += perfmon_itanium.o
3369 +obj-$(CONFIG_IA64_PERFMON_MCKINLEY) += perfmon_mckinley.o
3370 +obj-$(CONFIG_IA64_PERFMON_MONTECITO) += perfmon_montecito.o
3371 diff --git a/arch/ia64/perfmon/perfmon.c b/arch/ia64/perfmon/perfmon.c
3372 new file mode 100644
3373 index 0000000..3f59410
3374 --- /dev/null
3375 +++ b/arch/ia64/perfmon/perfmon.c
3376 @@ -0,0 +1,946 @@
3377 +/*
3378 + * This file implements the IA-64 specific
3379 + * support for the perfmon2 interface
3380 + *
3381 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
3382 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
3383 + *
3384 + * This program is free software; you can redistribute it and/or
3385 + * modify it under the terms of version 2 of the GNU General Public
3386 + * License as published by the Free Software Foundation.
3387 + *
3388 + * This program is distributed in the hope that it will be useful,
3389 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3390 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3391 + * General Public License for more details.
3392 + *
3393 + * You should have received a copy of the GNU General Public License
3394 + * along with this program; if not, write to the Free Software
3395 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
3396 + * 02111-1307 USA
3397 + */
3398 +#include <linux/module.h>
3399 +#include <linux/perfmon_kern.h>
3400 +
3401 +struct pfm_arch_session {
3402 + u32 pfs_sys_use_dbr; /* syswide session uses dbr */
3403 + u32 pfs_ptrace_use_dbr; /* a thread uses dbr via ptrace()*/
3404 +};
3405 +
3406 +DEFINE_PER_CPU(u32, pfm_syst_info);
3407 +
3408 +static struct pfm_arch_session pfm_arch_sessions;
3409 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_arch_sessions_lock);
3410 +
3411 +static inline void pfm_clear_psr_pp(void)
3412 +{
3413 + ia64_rsm(IA64_PSR_PP);
3414 +}
3415 +
3416 +static inline void pfm_set_psr_pp(void)
3417 +{
3418 + ia64_ssm(IA64_PSR_PP);
3419 +}
3420 +
3421 +static inline void pfm_clear_psr_up(void)
3422 +{
3423 + ia64_rsm(IA64_PSR_UP);
3424 +}
3425 +
3426 +static inline void pfm_set_psr_up(void)
3427 +{
3428 + ia64_ssm(IA64_PSR_UP);
3429 +}
3430 +
3431 +static inline void pfm_set_psr_l(u64 val)
3432 +{
3433 + ia64_setreg(_IA64_REG_PSR_L, val);
3434 +}
3435 +
3436 +static inline void pfm_restore_ibrs(u64 *ibrs, unsigned int nibrs)
3437 +{
3438 + unsigned int i;
3439 +
3440 + for (i = 0; i < nibrs; i++) {
3441 + ia64_set_ibr(i, ibrs[i]);
3442 + ia64_dv_serialize_instruction();
3443 + }
3444 + ia64_srlz_i();
3445 +}
3446 +
3447 +static inline void pfm_restore_dbrs(u64 *dbrs, unsigned int ndbrs)
3448 +{
3449 + unsigned int i;
3450 +
3451 + for (i = 0; i < ndbrs; i++) {
3452 + ia64_set_dbr(i, dbrs[i]);
3453 + ia64_dv_serialize_data();
3454 + }
3455 + ia64_srlz_d();
3456 +}
3457 +
3458 +irqreturn_t pmu_interrupt_handler(int irq, void *arg)
3459 +{
3460 + struct pt_regs *regs;
3461 + regs = get_irq_regs();
3462 + irq_enter();
3463 + pfm_interrupt_handler(instruction_pointer(regs), regs);
3464 + irq_exit();
3465 + return IRQ_HANDLED;
3466 +}
3467 +static struct irqaction perfmon_irqaction = {
3468 + .handler = pmu_interrupt_handler,
3469 + .flags = IRQF_DISABLED, /* means keep interrupts masked */
3470 + .name = "perfmon"
3471 +};
3472 +
3473 +void pfm_arch_quiesce_pmu_percpu(void)
3474 +{
3475 + u64 dcr;
3476 + /*
3477 + * make sure no measurement is active
3478 + * (may inherit programmed PMCs from EFI).
3479 + */
3480 + pfm_clear_psr_pp();
3481 + pfm_clear_psr_up();
3482 +
3483 + /*
3484 + * ensure dcr.pp is cleared
3485 + */
3486 + dcr = ia64_getreg(_IA64_REG_CR_DCR);
3487 + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
3488 +
3489 + /*
3490 + * we run with the PMU not frozen at all times
3491 + */
3492 + ia64_set_pmc(0, 0);
3493 + ia64_srlz_d();
3494 +}
3495 +
3496 +void pfm_arch_init_percpu(void)
3497 +{
3498 + pfm_arch_quiesce_pmu_percpu();
3499 + /*
3500 + * program PMU interrupt vector
3501 + */
3502 + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
3503 + ia64_srlz_d();
3504 +}
3505 +
3506 +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
3507 +{
3508 + struct pfm_arch_context *ctx_arch;
3509 +
3510 + ctx_arch = pfm_ctx_arch(ctx);
3511 +
3512 + ctx_arch->flags.use_dbr = 0;
3513 + ctx_arch->flags.insecure = (ctx_flags & PFM_ITA_FL_INSECURE) ? 1: 0;
3514 +
3515 + PFM_DBG("insecure=%d", ctx_arch->flags.insecure);
3516 +
3517 + return 0;
3518 +}
3519 +
3520 +/*
3521 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
3522 + * Context is locked. Interrupts are masked. Monitoring may be active.
3523 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
3524 + *
3525 + * Return:
3526 + * non-zero : did not save PMDs (as part of stopping the PMU)
3527 + * 0 : saved PMDs (no need to save them in caller)
3528 + */
3529 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
3530 +{
3531 + struct pfm_arch_context *ctx_arch;
3532 + struct pfm_event_set *set;
3533 + u64 psr, tmp;
3534 +
3535 + ctx_arch = pfm_ctx_arch(ctx);
3536 + set = ctx->active_set;
3537 +
3538 + /*
3539 + * save current PSR: needed because we modify it
3540 + */
3541 + ia64_srlz_d();
3542 + psr = ia64_getreg(_IA64_REG_PSR);
3543 +
3544 + /*
3545 + * stop monitoring:
3546 + * This is the last instruction which may generate an overflow
3547 + *
3548 + * we do not clear ipsr.up
3549 + */
3550 + pfm_clear_psr_up();
3551 + ia64_srlz_d();
3552 +
3553 + /*
3554 + * extract overflow status bits
3555 + */
3556 + tmp = ia64_get_pmc(0) & ~0xf;
3557 +
3558 + /*
3559 + * keep a copy of psr.up (for reload)
3560 + */
3561 + ctx_arch->ctx_saved_psr_up = psr & IA64_PSR_UP;
3562 +
3563 + /*
3564 + * save overflow status bits
3565 + */
3566 + set->povfl_pmds[0] = tmp;
3567 +
3568 + /*
3569 + * record how many pending overflows
3570 + * XXX: assume identity mapping for counters
3571 + */
3572 + set->npend_ovfls = ia64_popcnt(tmp);
3573 +
3574 + /*
3575 + * make sure the PMU is unfrozen for the next task
3576 + */
3577 + if (set->npend_ovfls) {
3578 + ia64_set_pmc(0, 0);
3579 + ia64_srlz_d();
3580 + }
3581 + return 1;
3582 +}
3583 +
3584 +/*
3585 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
3586 + * set cannot be NULL. Context is locked. Interrupts are masked.
3587 + * Caller has already restored all PMD and PMC registers.
3588 + *
3589 + * must reactivate monitoring
3590 + */
3591 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
3592 +{
3593 + struct pfm_arch_context *ctx_arch;
3594 +
3595 + ctx_arch = pfm_ctx_arch(ctx);
3596 +
3597 + /*
3598 + * when monitoring is not explicitly started
3599 + * then psr_up = 0, in which case we do not
3600 + * need to restore
3601 + */
3602 + if (likely(ctx_arch->ctx_saved_psr_up)) {
3603 + pfm_set_psr_up();
3604 + ia64_srlz_d();
3605 + }
3606 +}
3607 +
3608 +int pfm_arch_reserve_session(struct pfm_context *ctx, u32 cpu)
3609 +{
3610 + struct pfm_arch_context *ctx_arch;
3611 + int is_system;
3612 + int ret = 0;
3613 +
3614 + ctx_arch = pfm_ctx_arch(ctx);
3615 + is_system = ctx->flags.system;
3616 +
3617 + spin_lock(&pfm_arch_sessions_lock);
3618 +
3619 + if (is_system && ctx_arch->flags.use_dbr) {
3620 + PFM_DBG("syswide context uses dbregs");
3621 +
3622 + if (pfm_arch_sessions.pfs_ptrace_use_dbr) {
3623 + PFM_DBG("cannot reserve syswide context: "
3624 + "dbregs in use by ptrace");
3625 + ret = -EBUSY;
3626 + } else {
3627 + pfm_arch_sessions.pfs_sys_use_dbr++;
3628 + }
3629 + }
3630 + spin_unlock(&pfm_arch_sessions_lock);
3631 +
3632 + return ret;
3633 +}
3634 +
3635 +void pfm_arch_release_session(struct pfm_context *ctx, u32 cpu)
3636 +{
3637 + struct pfm_arch_context *ctx_arch;
3638 + int is_system;
3639 +
3640 + ctx_arch = pfm_ctx_arch(ctx);
3641 + is_system = ctx->flags.system;
3642 +
3643 + spin_lock(&pfm_arch_sessions_lock);
3644 +
3645 + if (is_system && ctx_arch->flags.use_dbr)
3646 + pfm_arch_sessions.pfs_sys_use_dbr--;
3647 + spin_unlock(&pfm_arch_sessions_lock);
3648 +}
3649 +
3650 +/*
3651 + * function called from pfm_load_context_*(). Task is not guaranteed to be
3652 + * current task. If not then other task is guaranteed stopped and off any CPU.
3653 + * context is locked and interrupts are masked.
3654 + *
3655 + * On PFM_LOAD_CONTEXT, the interface guarantees monitoring is stopped.
3656 + *
3657 + * For system-wide task is NULL
3658 + */
3659 +int pfm_arch_load_context(struct pfm_context *ctx)
3660 +{
3661 + struct pfm_arch_context *ctx_arch;
3662 + struct pt_regs *regs;
3663 + int ret = 0;
3664 +
3665 + ctx_arch = pfm_ctx_arch(ctx);
3666 +
3667 + /*
3668 + * cannot load a context which is using range restrictions,
3669 + * into a thread that is being debugged.
3670 + *
3671 + * if one set out of several is using the debug registers, then
3672 + * we assume the context as whole is using them.
3673 + */
3674 + if (ctx_arch->flags.use_dbr) {
3675 + if (ctx->flags.system) {
3676 + spin_lock(&pfm_arch_sessions_lock);
3677 +
3678 + if (pfm_arch_sessions.pfs_ptrace_use_dbr) {
3679 + PFM_DBG("cannot reserve syswide context: "
3680 + "dbregs in use by ptrace");
3681 + ret = -EBUSY;
3682 + } else {
3683 + pfm_arch_sessions.pfs_sys_use_dbr++;
3684 + PFM_DBG("pfs_sys_use_dbr=%u",
3685 + pfm_arch_sessions.pfs_sys_use_dbr);
3686 + }
3687 + spin_unlock(&pfm_arch_sessions_lock);
3688 +
3689 + } else if (ctx->task->thread.flags & IA64_THREAD_DBG_VALID) {
3690 + PFM_DBG("load_pid [%d] thread is debugged, cannot "
3691 + "use range restrictions", ctx->task->pid);
3692 + ret = -EBUSY;
3693 + }
3694 + if (ret)
3695 + return ret;
3696 + }
3697 +
3698 + /*
3699 + * We need to intervene on context switch to toggle the
3700 + * psr.pp bit in system-wide. As such, we set the TIF
3701 + * flag so that pfm_arch_ctxswout_sys() and the
3702 + * pfm_arch_ctxswin_sys() functions get called
3703 + * from pfm_ctxsw_sys();
3704 + */
3705 + if (ctx->flags.system) {
3706 + set_thread_flag(TIF_PERFMON_CTXSW);
3707 + PFM_DBG("[%d] set TIF", current->pid);
3708 + return 0;
3709 + }
3710 +
3711 + regs = task_pt_regs(ctx->task);
3712 +
3713 + /*
3714 + * self-monitoring systematically allows user level control
3715 + */
3716 + if (ctx->task != current) {
3717 + /*
3718 + * when not current, task is stopped, so this is safe
3719 + */
3720 + ctx_arch->ctx_saved_psr_up = 0;
3721 + ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
3722 + } else
3723 + ctx_arch->flags.insecure = 1;
3724 +
3725 + /*
3726 + * allow user level control (start/stop/read pmd) if:
3727 + * - self-monitoring
3728 + * - requested at context creation (PFM_IA64_FL_INSECURE)
3729 + *
3730 + * There is not security hole with PFM_IA64_FL_INSECURE because
3731 + * when not self-monitored, the caller must have permissions to
3732 + * attached to the task.
3733 + */
3734 + if (ctx_arch->flags.insecure) {
3735 + ia64_psr(regs)->sp = 0;
3736 + PFM_DBG("clearing psr.sp for [%d]", ctx->task->pid);
3737 + }
3738 + return 0;
3739 +}
3740 +
3741 +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
3742 +{
3743 +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH)
3744 +#define PFM_ITA_SETFL_BOTH_INTR (PFM_ITA_SETFL_INTR_ONLY|\
3745 + PFM_ITA_SETFL_EXCL_INTR)
3746 +
3747 +/* exclude return value field */
3748 +#define PFM_SETFL_ALL_MASK (PFM_ITA_SETFL_BOTH_INTR \
3749 + | PFM_SETFL_BOTH_SWITCH \
3750 + | PFM_ITA_SETFL_IDLE_EXCL)
3751 +
3752 + if ((flags & ~PFM_SETFL_ALL_MASK)) {
3753 + PFM_DBG("invalid flags=0x%x", flags);
3754 + return -EINVAL;
3755 + }
3756 +
3757 + if ((flags & PFM_ITA_SETFL_BOTH_INTR) == PFM_ITA_SETFL_BOTH_INTR) {
3758 + PFM_DBG("both excl intr and ontr only are set");
3759 + return -EINVAL;
3760 + }
3761 +
3762 + if ((flags & PFM_ITA_SETFL_IDLE_EXCL) && !ctx->flags.system) {
3763 + PFM_DBG("idle exclude flag only for system-wide context");
3764 + return -EINVAL;
3765 + }
3766 + return 0;
3767 +}
3768 +
3769 +/*
3770 + * function called from pfm_unload_context_*(). Context is locked.
3771 + * interrupts are masked. task is not guaranteed to be current task.
3772 + * Access to PMU is not guaranteed.
3773 + *
3774 + * function must do whatever arch-specific action is required on unload
3775 + * of a context.
3776 + *
3777 + * called for both system-wide and per-thread. task is NULL for ssytem-wide
3778 + */
3779 +void pfm_arch_unload_context(struct pfm_context *ctx)
3780 +{
3781 + struct pfm_arch_context *ctx_arch;
3782 + struct pt_regs *regs;
3783 +
3784 + ctx_arch = pfm_ctx_arch(ctx);
3785 +
3786 + if (ctx->flags.system) {
3787 + /*
3788 + * disable context switch hook
3789 + */
3790 + clear_thread_flag(TIF_PERFMON_CTXSW);
3791 +
3792 + if (ctx_arch->flags.use_dbr) {
3793 + spin_lock(&pfm_arch_sessions_lock);
3794 + pfm_arch_sessions.pfs_sys_use_dbr--;
3795 + PFM_DBG("sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr);
3796 + spin_unlock(&pfm_arch_sessions_lock);
3797 + }
3798 + } else {
3799 + regs = task_pt_regs(ctx->task);
3800 +
3801 + /*
3802 + * cancel user level control for per-task context
3803 + */
3804 + ia64_psr(regs)->sp = 1;
3805 + PFM_DBG("setting psr.sp for [%d]", ctx->task->pid);
3806 + }
3807 +}
3808 +
3809 +/*
3810 + * mask monitoring by setting the privilege level to 0
3811 + * we cannot use psr.pp/psr.up for this, it is controlled by
3812 + * the user
3813 + */
3814 +void pfm_arch_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
3815 +{
3816 + struct pfm_arch_pmu_info *arch_info;
3817 + unsigned long mask;
3818 + unsigned int i;
3819 +
3820 + arch_info = pfm_pmu_info();
3821 + /*
3822 + * as an optimization we look at the first 64 PMC
3823 + * registers only starting at PMC4.
3824 + */
3825 + mask = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR;
3826 + for (i = PFM_ITA_FCNTR; mask; i++, mask >>= 1) {
3827 + if (likely(mask & 0x1))
3828 + ia64_set_pmc(i, set->pmcs[i] & ~0xfUL);
3829 + }
3830 + /*
3831 + * make changes visisble
3832 + */
3833 + ia64_srlz_d();
3834 +}
3835 +
3836 +/*
3837 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
3838 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
3839 + * context is locked. Interrupts are masked. set cannot be NULL.
3840 + * Access to the PMU is guaranteed.
3841 + *
3842 + * function must restore all PMD registers from set.
3843 + */
3844 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
3845 +{
3846 + struct pfm_arch_context *ctx_arch;
3847 + unsigned long *mask;
3848 + u16 i, num;
3849 +
3850 + ctx_arch = pfm_ctx_arch(ctx);
3851 +
3852 + if (ctx_arch->flags.insecure) {
3853 + num = ctx->regs.num_rw_pmd;
3854 + mask = ctx->regs.rw_pmds;
3855 + } else {
3856 + num = set->nused_pmds;
3857 + mask = set->used_pmds;
3858 + }
3859 + /*
3860 + * must restore all implemented read-write PMDS to avoid leaking
3861 + * information especially when PFM_IA64_FL_INSECURE is set.
3862 + *
3863 + * XXX: should check PFM_IA64_FL_INSECURE==0 and use used_pmd instead
3864 + */
3865 + for (i = 0; num; i++) {
3866 + if (likely(test_bit(i, mask))) {
3867 + pfm_arch_write_pmd(ctx, i, set->pmds[i].value);
3868 + num--;
3869 + }
3870 + }
3871 + ia64_srlz_d();
3872 +}
3873 +
3874 +/*
3875 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
3876 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
3877 + * context is locked. Interrupts are masked. set cannot be NULL.
3878 + * Access to the PMU is guaranteed.
3879 + *
3880 + * function must restore all PMC registers from set if needed
3881 + */
3882 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
3883 +{
3884 + struct pfm_arch_pmu_info *arch_info;
3885 + u64 mask2 = 0, val, plm;
3886 + unsigned long impl_mask, mask_pmcs;
3887 + unsigned int i;
3888 +
3889 + arch_info = pfm_pmu_info();
3890 + /*
3891 + * as an optimization we only look at the first 64
3892 + * PMC registers. In fact, we should never scan the
3893 + * entire impl_pmcs because ibr/dbr are implemented
3894 + * separately.
3895 + *
3896 + * always skip PMC0-PMC3. PMC0 taken care of when saving
3897 + * state. PMC1-PMC3 not used until we get counters in
3898 + * the 60 and above index range.
3899 + */
3900 + impl_mask = ctx->regs.pmcs[0] >> PFM_ITA_FCNTR;
3901 + mask_pmcs = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR;
3902 + plm = ctx->state == PFM_CTX_MASKED ? ~0xf : ~0x0;
3903 +
3904 + for (i = PFM_ITA_FCNTR;
3905 + impl_mask;
3906 + i++, impl_mask >>= 1, mask_pmcs >>= 1) {
3907 + if (likely(impl_mask & 0x1)) {
3908 + mask2 = mask_pmcs & 0x1 ? plm : ~0;
3909 + val = set->pmcs[i] & mask2;
3910 + ia64_set_pmc(i, val);
3911 + PFM_DBG_ovfl("pmc%u=0x%lx", i, val);
3912 + }
3913 + }
3914 + /*
3915 + * restore DBR/IBR
3916 + */
3917 + if (set->priv_flags & PFM_ITA_SETFL_USE_DBR) {
3918 + pfm_restore_ibrs(set->pmcs+256, 8);
3919 + pfm_restore_dbrs(set->pmcs+264, 8);
3920 + }
3921 + ia64_srlz_d();
3922 +}
3923 +
3924 +void pfm_arch_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
3925 +{
3926 + u64 psr;
3927 + int is_system;
3928 +
3929 + is_system = ctx->flags.system;
3930 +
3931 + psr = ia64_getreg(_IA64_REG_PSR);
3932 +
3933 + /*
3934 + * monitoring is masked via the PMC.plm
3935 + *
3936 + * As we restore their value, we do not want each counter to
3937 + * restart right away. We stop monitoring using the PSR,
3938 + * restore the PMC (and PMD) and then re-establish the psr
3939 + * as it was. Note that there can be no pending overflow at
3940 + * this point, because monitoring is still MASKED.
3941 + *
3942 + * Because interrupts are masked we can avoid changing
3943 + * DCR.pp.
3944 + */
3945 + if (is_system)
3946 + pfm_clear_psr_pp();
3947 + else
3948 + pfm_clear_psr_up();
3949 +
3950 + ia64_srlz_d();
3951 +
3952 + pfm_arch_restore_pmcs(ctx, set);
3953 +
3954 + /*
3955 + * restore psr
3956 + *
3957 + * monitoring may start right now but interrupts
3958 + * are still masked
3959 + */
3960 + pfm_set_psr_l(psr);
3961 + ia64_srlz_d();
3962 +}
3963 +
3964 +/*
3965 + * Called from pfm_stop()
3966 + *
3967 + * For per-thread:
3968 + * task is not necessarily current. If not current task, then
3969 + * task is guaranteed stopped and off any cpu. Access to PMU
3970 + * is not guaranteed. Interrupts are masked. Context is locked.
3971 + * Set is the active set.
3972 + *
3973 + * must disable active monitoring. ctx cannot be NULL
3974 + */
3975 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
3976 +{
3977 + struct pfm_arch_context *ctx_arch;
3978 + struct pt_regs *regs;
3979 + u64 dcr, psr;
3980 +
3981 + ctx_arch = pfm_ctx_arch(ctx);
3982 + regs = task_pt_regs(task);
3983 +
3984 + if (!ctx->flags.system) {
3985 + /*
3986 + * in ZOMBIE state we always have task == current due to
3987 + * pfm_exit_thread()
3988 + */
3989 + ia64_psr(regs)->up = 0;
3990 + ctx_arch->ctx_saved_psr_up = 0;
3991 +
3992 + /*
3993 + * in case of ZOMBIE state, there is no unload to clear
3994 + * insecure monitoring, so we do it in stop instead.
3995 + */
3996 + if (ctx->state == PFM_CTX_ZOMBIE)
3997 + ia64_psr(regs)->sp = 1;
3998 +
3999 + if (task == current) {
4000 + pfm_clear_psr_up();
4001 + ia64_srlz_d();
4002 + }
4003 + } else if (ctx->flags.started) { /* do not stop twice */
4004 + dcr = ia64_getreg(_IA64_REG_CR_DCR);
4005 + psr = ia64_getreg(_IA64_REG_PSR);
4006 +
4007 + ia64_psr(regs)->pp = 0;
4008 + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
4009 + pfm_clear_psr_pp();
4010 + ia64_srlz_d();
4011 +
4012 + if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) {
4013 + PFM_DBG("disabling idle exclude");
4014 + __get_cpu_var(pfm_syst_info) &= ~PFM_ITA_CPUINFO_IDLE_EXCL;
4015 + }
4016 + }
4017 +}
4018 +
4019 +/*
4020 + * called from pfm_start()
4021 + *
4022 + * Interrupts are masked. Context is locked. Set is the active set.
4023 + *
4024 + * For per-thread:
4025 + * Task is not necessarily current. If not current task, then task
4026 + * is guaranteed stopped and off any cpu. No access to PMU is task
4027 + * is not current.
4028 + *
4029 + * For system-wide:
4030 + * task is always current
4031 + *
4032 + * must enable active monitoring.
4033 + */
4034 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
4035 +{
4036 + struct pfm_arch_context *ctx_arch;
4037 + struct pt_regs *regs;
4038 + u64 dcr, dcr_pp, psr_pp;
4039 + u32 flags;
4040 +
4041 + ctx_arch = pfm_ctx_arch(ctx);
4042 + regs = task_pt_regs(task);
4043 + flags = ctx->active_set->flags;
4044 +
4045 + /*
4046 + * per-thread mode
4047 + */
4048 + if (!ctx->flags.system) {
4049 +
4050 + ia64_psr(regs)->up = 1;
4051 +
4052 + if (task == current) {
4053 + pfm_set_psr_up();
4054 + ia64_srlz_d();
4055 + } else {
4056 + /*
4057 + * activate monitoring at next ctxswin
4058 + */
4059 + ctx_arch->ctx_saved_psr_up = IA64_PSR_UP;
4060 + }
4061 + return;
4062 + }
4063 +
4064 + /*
4065 + * system-wide mode
4066 + */
4067 + dcr = ia64_getreg(_IA64_REG_CR_DCR);
4068 + if (flags & PFM_ITA_SETFL_INTR_ONLY) {
4069 + dcr_pp = 1;
4070 + psr_pp = 0;
4071 + } else if (flags & PFM_ITA_SETFL_EXCL_INTR) {
4072 + dcr_pp = 0;
4073 + psr_pp = 1;
4074 + } else {
4075 + dcr_pp = psr_pp = 1;
4076 + }
4077 + PFM_DBG("dcr_pp=%lu psr_pp=%lu", dcr_pp, psr_pp);
4078 +
4079 + /*
4080 + * update dcr_pp and psr_pp
4081 + */
4082 + if (dcr_pp)
4083 + ia64_setreg(_IA64_REG_CR_DCR, dcr | IA64_DCR_PP);
4084 + else
4085 + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
4086 +
4087 + if (psr_pp) {
4088 + pfm_set_psr_pp();
4089 + ia64_psr(regs)->pp = 1;
4090 + } else {
4091 + pfm_clear_psr_pp();
4092 + ia64_psr(regs)->pp = 0;
4093 + }
4094 + ia64_srlz_d();
4095 +
4096 + if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) {
4097 + PFM_DBG("enable idle exclude");
4098 + __get_cpu_var(pfm_syst_info) |= PFM_ITA_CPUINFO_IDLE_EXCL;
4099 + }
4100 +}
4101 +
4102 +/*
4103 + * Only call this function when a process is trying to
4104 + * write the debug registers (reading is always allowed)
4105 + * called from arch/ia64/kernel/ptrace.c:access_uarea()
4106 + */
4107 +int __pfm_use_dbregs(struct task_struct *task)
4108 +{
4109 + struct pfm_arch_context *ctx_arch;
4110 + struct pfm_context *ctx;
4111 + unsigned long flags;
4112 + int ret = 0;
4113 +
4114 + PFM_DBG("called for [%d]", task->pid);
4115 +
4116 + ctx = task->pfm_context;
4117 +
4118 + /*
4119 + * do it only once
4120 + */
4121 + if (task->thread.flags & IA64_THREAD_DBG_VALID) {
4122 + PFM_DBG("IA64_THREAD_DBG_VALID already set");
4123 + return 0;
4124 + }
4125 + if (ctx) {
4126 + spin_lock_irqsave(&ctx->lock, flags);
4127 + ctx_arch = pfm_ctx_arch(ctx);
4128 +
4129 + if (ctx_arch->flags.use_dbr == 1) {
4130 + PFM_DBG("PMU using dbregs already, no ptrace access");
4131 + ret = -1;
4132 + }
4133 + spin_unlock_irqrestore(&ctx->lock, flags);
4134 + if (ret)
4135 + return ret;
4136 + }
4137 +
4138 + spin_lock(&pfm_arch_sessions_lock);
4139 +
4140 + /*
4141 + * We cannot allow setting breakpoints when system wide monitoring
4142 + * sessions are using the debug registers.
4143 + */
4144 + if (!pfm_arch_sessions.pfs_sys_use_dbr)
4145 + pfm_arch_sessions.pfs_ptrace_use_dbr++;
4146 + else
4147 + ret = -1;
4148 +
4149 + PFM_DBG("ptrace_use_dbr=%u sys_use_dbr=%u by [%d] ret = %d",
4150 + pfm_arch_sessions.pfs_ptrace_use_dbr,
4151 + pfm_arch_sessions.pfs_sys_use_dbr,
4152 + task->pid, ret);
4153 +
4154 + spin_unlock(&pfm_arch_sessions_lock);
4155 + if (ret)
4156 + return ret;
4157 +#ifndef CONFIG_SMP
4158 + /*
4159 + * in UP, we need to check whether the current
4160 + * owner of the PMU is not using the debug registers
4161 + * for monitoring. Because we are using a lazy
4162 + * save on ctxswout, we must force a save in this
4163 + * case because the debug registers are being
4164 + * modified by another task. We save the current
4165 + * PMD registers, and clear ownership. In ctxswin,
4166 + * full state will be reloaded.
4167 + *
4168 + * Note: we overwrite task.
4169 + */
4170 + task = __get_cpu_var(pmu_owner);
4171 + ctx = __get_cpu_var(pmu_ctx);
4172 +
4173 + if (task == NULL)
4174 + return 0;
4175 +
4176 + ctx_arch = pfm_ctx_arch(ctx);
4177 +
4178 + if (ctx_arch->flags.use_dbr)
4179 + pfm_save_pmds_release(ctx);
4180 +#endif
4181 + return 0;
4182 +}
4183 +
4184 +/*
4185 + * This function is called for every task that exits with the
4186 + * IA64_THREAD_DBG_VALID set. This indicates a task which was
4187 + * able to use the debug registers for debugging purposes via
4188 + * ptrace(). Therefore we know it was not using them for
4189 + * perfmormance monitoring, so we only decrement the number
4190 + * of "ptraced" debug register users to keep the count up to date
4191 + */
4192 +int __pfm_release_dbregs(struct task_struct *task)
4193 +{
4194 + int ret;
4195 +
4196 + spin_lock(&pfm_arch_sessions_lock);
4197 +
4198 + if (pfm_arch_sessions.pfs_ptrace_use_dbr == 0) {
4199 + PFM_ERR("invalid release for [%d] ptrace_use_dbr=0", task->pid);
4200 + ret = -1;
4201 + } else {
4202 + pfm_arch_sessions.pfs_ptrace_use_dbr--;
4203 + ret = 0;
4204 + }
4205 + spin_unlock(&pfm_arch_sessions_lock);
4206 +
4207 + return ret;
4208 +}
4209 +
4210 +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
4211 + struct pfm_event_set *set)
4212 +{
4213 + struct pfm_arch_context *ctx_arch;
4214 + struct task_struct *task;
4215 + struct thread_struct *thread;
4216 + int ret = 0, state;
4217 + int i, can_access_pmu = 0;
4218 + int is_loaded, is_system;
4219 +
4220 + ctx_arch = pfm_ctx_arch(ctx);
4221 + state = ctx->state;
4222 + task = ctx->task;
4223 + is_loaded = state == PFM_CTX_LOADED || state == PFM_CTX_MASKED;
4224 + is_system = ctx->flags.system;
4225 + can_access_pmu = __get_cpu_var(pmu_owner) == task || is_system;
4226 +
4227 + if (is_loaded == 0)
4228 + goto done;
4229 +
4230 + if (is_system == 0) {
4231 + thread = &(task->thread);
4232 +
4233 + /*
4234 + * cannot use debug registers for montioring if they are
4235 + * already used for debugging
4236 + */
4237 + if (thread->flags & IA64_THREAD_DBG_VALID) {
4238 + PFM_DBG("debug registers already in use for [%d]",
4239 + task->pid);
4240 + return -EBUSY;
4241 + }
4242 + }
4243 +
4244 + /*
4245 + * check for debug registers in system wide mode
4246 + */
4247 + spin_lock(&pfm_arch_sessions_lock);
4248 +
4249 + if (is_system) {
4250 + if (pfm_arch_sessions.pfs_ptrace_use_dbr)
4251 + ret = -EBUSY;
4252 + else
4253 + pfm_arch_sessions.pfs_sys_use_dbr++;
4254 + }
4255 +
4256 + spin_unlock(&pfm_arch_sessions_lock);
4257 +
4258 + if (ret != 0)
4259 + return ret;
4260 +
4261 + /*
4262 + * clear hardware registers to make sure we don't
4263 + * pick up stale state.
4264 + */
4265 + if (can_access_pmu) {
4266 + PFM_DBG("clearing ibrs, dbrs");
4267 + for (i = 0; i < 8; i++) {
4268 + ia64_set_ibr(i, 0);
4269 + ia64_dv_serialize_instruction();
4270 + }
4271 + ia64_srlz_i();
4272 + for (i = 0; i < 8; i++) {
4273 + ia64_set_dbr(i, 0);
4274 + ia64_dv_serialize_data();
4275 + }
4276 + ia64_srlz_d();
4277 + }
4278 +done:
4279 + /*
4280 + * debug registers are now in use
4281 + */
4282 + ctx_arch->flags.use_dbr = 1;
4283 + set->priv_flags |= PFM_ITA_SETFL_USE_DBR;
4284 + PFM_DBG("set%u use_dbr=1", set->id);
4285 + return 0;
4286 +}
4287 +EXPORT_SYMBOL(pfm_ia64_mark_dbregs_used);
4288 +
4289 +char *pfm_arch_get_pmu_module_name(void)
4290 +{
4291 + switch (local_cpu_data->family) {
4292 + case 0x07:
4293 + return "perfmon_itanium";
4294 + case 0x1f:
4295 + return "perfmon_mckinley";
4296 + case 0x20:
4297 + return "perfmon_montecito";
4298 + default:
4299 + return "perfmon_generic";
4300 + }
4301 + return NULL;
4302 +}
4303 +
4304 +/*
4305 + * global arch-specific intialization, called only once
4306 + */
4307 +int __init pfm_arch_init(void)
4308 +{
4309 + int ret;
4310 +
4311 + spin_lock_init(&pfm_arch_sessions_lock);
4312 +
4313 +#ifdef CONFIG_IA64_PERFMON_COMPAT
4314 + ret = pfm_ia64_compat_init();
4315 + if (ret)
4316 + return ret;
4317 +#endif
4318 + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
4319 +
4320 +
4321 + return 0;
4322 +}
4323 diff --git a/arch/ia64/perfmon/perfmon_compat.c b/arch/ia64/perfmon/perfmon_compat.c
4324 new file mode 100644
4325 index 0000000..2fd3d3c
4326 --- /dev/null
4327 +++ b/arch/ia64/perfmon/perfmon_compat.c
4328 @@ -0,0 +1,1210 @@
4329 +/*
4330 + * This file implements the IA-64 specific
4331 + * support for the perfmon2 interface
4332 + *
4333 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
4334 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
4335 + *
4336 + * This program is free software; you can redistribute it and/or
4337 + * modify it under the terms of version 2 of the GNU General Public
4338 + * License as published by the Free Software Foundation.
4339 + *
4340 + * This program is distributed in the hope that it will be useful,
4341 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4342 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4343 + * General Public License for more details.
4344 + *
4345 + * You should have received a copy of the GNU General Public License
4346 + * along with this program; if not, write to the Free Software
4347 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
4348 + * 02111-1307 USA
4349 + */
4350 +#include <linux/interrupt.h>
4351 +#include <linux/module.h>
4352 +#include <linux/file.h>
4353 +#include <linux/fdtable.h>
4354 +#include <linux/seq_file.h>
4355 +#include <linux/vmalloc.h>
4356 +#include <linux/proc_fs.h>
4357 +#include <linux/perfmon_kern.h>
4358 +#include <linux/uaccess.h>
4359 +
4360 +asmlinkage long sys_pfm_stop(int fd);
4361 +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *st);
4362 +asmlinkage long sys_pfm_unload_context(int fd);
4363 +asmlinkage long sys_pfm_restart(int fd);
4364 +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ld);
4365 +
4366 +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what);
4367 +
4368 +extern ssize_t __pfm_read(struct pfm_context *ctx,
4369 + union pfarg_msg *msg_buf,
4370 + int non_block);
4371 +/*
4372 + * function providing some help for backward compatiblity with old IA-64
4373 + * applications. In the old model, certain attributes of a counter were
4374 + * passed via the PMC, now they are passed via the PMD.
4375 + */
4376 +static int pfm_compat_update_pmd(struct pfm_context *ctx, u16 set_id, u16 cnum,
4377 + u32 rflags,
4378 + unsigned long *smpl_pmds,
4379 + unsigned long *reset_pmds,
4380 + u64 eventid)
4381 +{
4382 + struct pfm_event_set *set;
4383 + int is_counting;
4384 + unsigned long *impl_pmds;
4385 + u32 flags = 0;
4386 + u16 max_pmd;
4387 +
4388 + impl_pmds = ctx->regs.pmds;
4389 + max_pmd = ctx->regs.max_pmd;
4390 +
4391 + /*
4392 + * given that we do not maintain PMC ->PMD dependencies
4393 + * we cannot figure out what to do in case PMCxx != PMDxx
4394 + */
4395 + if (cnum > max_pmd)
4396 + return 0;
4397 +
4398 + /*
4399 + * assumes PMCxx controls PMDxx which is always true for counters
4400 + * on Itanium PMUs.
4401 + */
4402 + is_counting = pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64;
4403 + set = pfm_find_set(ctx, set_id, 0);
4404 +
4405 + /*
4406 + * for v2.0, we only allowed counting PMD to generate
4407 + * user-level notifications. Same thing with randomization.
4408 + */
4409 + if (is_counting) {
4410 + if (rflags & PFM_REGFL_OVFL_NOTIFY)
4411 + flags |= PFM_REGFL_OVFL_NOTIFY;
4412 + if (rflags & PFM_REGFL_RANDOM)
4413 + flags |= PFM_REGFL_RANDOM;
4414 + /*
4415 + * verify validity of smpl_pmds
4416 + */
4417 + if (unlikely(bitmap_subset(smpl_pmds,
4418 + impl_pmds, max_pmd) == 0)) {
4419 + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u",
4420 + (unsigned long long)smpl_pmds[0], cnum);
4421 + return -EINVAL;
4422 + }
4423 + /*
4424 + * verify validity of reset_pmds
4425 + */
4426 + if (unlikely(bitmap_subset(reset_pmds,
4427 + impl_pmds, max_pmd) == 0)) {
4428 + PFM_DBG("invalid reset_pmds=0x%lx for pmd%u",
4429 + reset_pmds[0], cnum);
4430 + return -EINVAL;
4431 + }
4432 + /*
4433 + * ensures that a PFM_READ_PMDS succeeds with a
4434 + * corresponding PFM_WRITE_PMDS
4435 + */
4436 + __set_bit(cnum, set->used_pmds);
4437 +
4438 + } else if (rflags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
4439 + PFM_DBG("cannot set ovfl_notify or random on pmd%u", cnum);
4440 + return -EINVAL;
4441 + }
4442 +
4443 + set->pmds[cnum].flags = flags;
4444 +
4445 + if (is_counting) {
4446 + bitmap_copy(set->pmds[cnum].reset_pmds,
4447 + reset_pmds,
4448 + max_pmd);
4449 +
4450 + bitmap_copy(set->pmds[cnum].smpl_pmds,
4451 + smpl_pmds,
4452 + max_pmd);
4453 +
4454 + set->pmds[cnum].eventid = eventid;
4455 +
4456 + /*
4457 + * update ovfl_notify
4458 + */
4459 + if (rflags & PFM_REGFL_OVFL_NOTIFY)
4460 + __set_bit(cnum, set->ovfl_notify);
4461 + else
4462 + __clear_bit(cnum, set->ovfl_notify);
4463 +
4464 + }
4465 + PFM_DBG("pmd%u flags=0x%x eventid=0x%lx r_pmds=0x%lx s_pmds=0x%lx",
4466 + cnum, flags,
4467 + eventid,
4468 + reset_pmds[0],
4469 + smpl_pmds[0]);
4470 +
4471 + return 0;
4472 +}
4473 +
4474 +
4475 +int __pfm_write_ibrs_old(struct pfm_context *ctx, void *arg, int count)
4476 +{
4477 + struct pfarg_dbreg *req = arg;
4478 + struct pfarg_pmc pmc;
4479 + int i, ret = 0;
4480 +
4481 + memset(&pmc, 0, sizeof(pmc));
4482 +
4483 + for (i = 0; i < count; i++, req++) {
4484 + pmc.reg_num = 256+req->dbreg_num;
4485 + pmc.reg_value = req->dbreg_value;
4486 + pmc.reg_flags = 0;
4487 + pmc.reg_set = req->dbreg_set;
4488 +
4489 + ret = __pfm_write_pmcs(ctx, &pmc, 1);
4490 +
4491 + req->dbreg_flags &= ~PFM_REG_RETFL_MASK;
4492 + req->dbreg_flags |= pmc.reg_flags;
4493 +
4494 + if (ret)
4495 + return ret;
4496 + }
4497 + return 0;
4498 +}
4499 +
4500 +static long pfm_write_ibrs_old(int fd, void __user *ureq, int count)
4501 +{
4502 + struct pfm_context *ctx;
4503 + struct task_struct *task;
4504 + struct file *filp;
4505 + struct pfarg_dbreg *req = NULL;
4506 + void *fptr, *resume;
4507 + unsigned long flags;
4508 + size_t sz;
4509 + int ret, fput_needed;
4510 +
4511 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4512 + return -EINVAL;
4513 +
4514 + sz = count*sizeof(*req);
4515 +
4516 + filp = fget_light(fd, &fput_needed);
4517 + if (unlikely(filp == NULL)) {
4518 + PFM_DBG("invalid fd %d", fd);
4519 + return -EBADF;
4520 + }
4521 +
4522 + ctx = filp->private_data;
4523 + ret = -EBADF;
4524 +
4525 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4526 + PFM_DBG("fd %d not related to perfmon", fd);
4527 + goto error;
4528 + }
4529 +
4530 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4531 + if (ret)
4532 + goto error;
4533 +
4534 + spin_lock_irqsave(&ctx->lock, flags);
4535 +
4536 + task = ctx->task;
4537 +
4538 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4539 + if (ret == 0)
4540 + ret = __pfm_write_ibrs_old(ctx, req, count);
4541 +
4542 + spin_unlock_irqrestore(&ctx->lock, flags);
4543 +
4544 + if (resume)
4545 + pfm_resume_task(task, resume);
4546 +
4547 + if (copy_to_user(ureq, req, sz))
4548 + ret = -EFAULT;
4549 +
4550 + kfree(fptr);
4551 +error:
4552 + fput_light(filp, fput_needed);
4553 + return ret;
4554 +}
4555 +
4556 +int __pfm_write_dbrs_old(struct pfm_context *ctx, void *arg, int count)
4557 +{
4558 + struct pfarg_dbreg *req = arg;
4559 + struct pfarg_pmc pmc;
4560 + int i, ret = 0;
4561 +
4562 + memset(&pmc, 0, sizeof(pmc));
4563 +
4564 + for (i = 0; i < count; i++, req++) {
4565 + pmc.reg_num = 264+req->dbreg_num;
4566 + pmc.reg_value = req->dbreg_value;
4567 + pmc.reg_flags = 0;
4568 + pmc.reg_set = req->dbreg_set;
4569 +
4570 + ret = __pfm_write_pmcs(ctx, &pmc, 1);
4571 +
4572 + req->dbreg_flags &= ~PFM_REG_RETFL_MASK;
4573 + req->dbreg_flags |= pmc.reg_flags;
4574 + if (ret)
4575 + return ret;
4576 + }
4577 + return 0;
4578 +}
4579 +
4580 +static long pfm_write_dbrs_old(int fd, void __user *ureq, int count)
4581 +{
4582 + struct pfm_context *ctx;
4583 + struct task_struct *task;
4584 + struct file *filp;
4585 + struct pfarg_dbreg *req = NULL;
4586 + void *fptr, *resume;
4587 + unsigned long flags;
4588 + size_t sz;
4589 + int ret, fput_needed;
4590 +
4591 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4592 + return -EINVAL;
4593 +
4594 + sz = count*sizeof(*req);
4595 +
4596 + filp = fget_light(fd, &fput_needed);
4597 + if (unlikely(filp == NULL)) {
4598 + PFM_DBG("invalid fd %d", fd);
4599 + return -EBADF;
4600 + }
4601 +
4602 + ctx = filp->private_data;
4603 + ret = -EBADF;
4604 +
4605 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4606 + PFM_DBG("fd %d not related to perfmon", fd);
4607 + goto error;
4608 + }
4609 +
4610 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4611 + if (ret)
4612 + goto error;
4613 +
4614 + spin_lock_irqsave(&ctx->lock, flags);
4615 +
4616 + task = ctx->task;
4617 +
4618 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4619 + if (ret == 0)
4620 + ret = __pfm_write_dbrs_old(ctx, req, count);
4621 +
4622 + spin_unlock_irqrestore(&ctx->lock, flags);
4623 +
4624 + if (resume)
4625 + pfm_resume_task(task, resume);
4626 +
4627 + if (copy_to_user(ureq, req, sz))
4628 + ret = -EFAULT;
4629 +
4630 + kfree(fptr);
4631 +error:
4632 + fput_light(filp, fput_needed);
4633 + return ret;
4634 +}
4635 +
4636 +int __pfm_write_pmcs_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
4637 + int count)
4638 +{
4639 + struct pfarg_pmc req;
4640 + unsigned int i;
4641 + int ret, error_code;
4642 +
4643 + memset(&req, 0, sizeof(req));
4644 +
4645 + for (i = 0; i < count; i++, req_old++) {
4646 + req.reg_num = req_old->reg_num;
4647 + req.reg_set = req_old->reg_set;
4648 + req.reg_flags = 0;
4649 + req.reg_value = req_old->reg_value;
4650 +
4651 + ret = __pfm_write_pmcs(ctx, (void *)&req, 1);
4652 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4653 + req_old->reg_flags |= req.reg_flags;
4654 +
4655 + if (ret)
4656 + return ret;
4657 +
4658 + ret = pfm_compat_update_pmd(ctx, req_old->reg_set,
4659 + req_old->reg_num,
4660 + (u32)req_old->reg_flags,
4661 + req_old->reg_smpl_pmds,
4662 + req_old->reg_reset_pmds,
4663 + req_old->reg_smpl_eventid);
4664 +
4665 + error_code = ret ? PFM_REG_RETFL_EINVAL : 0;
4666 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4667 + req_old->reg_flags |= error_code;
4668 +
4669 + if (ret)
4670 + return ret;
4671 + }
4672 + return 0;
4673 +}
4674 +
4675 +static long pfm_write_pmcs_old(int fd, void __user *ureq, int count)
4676 +{
4677 + struct pfm_context *ctx;
4678 + struct task_struct *task;
4679 + struct file *filp;
4680 + struct pfarg_reg *req = NULL;
4681 + void *fptr, *resume;
4682 + unsigned long flags;
4683 + size_t sz;
4684 + int ret, fput_needed;
4685 +
4686 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4687 + return -EINVAL;
4688 +
4689 + sz = count*sizeof(*req);
4690 +
4691 + filp = fget_light(fd, &fput_needed);
4692 + if (unlikely(filp == NULL)) {
4693 + PFM_DBG("invalid fd %d", fd);
4694 + return -EBADF;
4695 + }
4696 +
4697 + ctx = filp->private_data;
4698 + ret = -EBADF;
4699 +
4700 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4701 + PFM_DBG("fd %d not related to perfmon", fd);
4702 + goto error;
4703 + }
4704 +
4705 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4706 + if (ret)
4707 + goto error;
4708 +
4709 + spin_lock_irqsave(&ctx->lock, flags);
4710 +
4711 + task = ctx->task;
4712 +
4713 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4714 + if (ret == 0)
4715 + ret = __pfm_write_pmcs_old(ctx, req, count);
4716 +
4717 + spin_unlock_irqrestore(&ctx->lock, flags);
4718 +
4719 + if (resume)
4720 + pfm_resume_task(task, resume);
4721 +
4722 + if (copy_to_user(ureq, req, sz))
4723 + ret = -EFAULT;
4724 +
4725 + kfree(fptr);
4726 +
4727 +error:
4728 + fput_light(filp, fput_needed);
4729 + return ret;
4730 +}
4731 +
4732 +int __pfm_write_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
4733 + int count)
4734 +{
4735 + struct pfarg_pmd req;
4736 + int i, ret;
4737 +
4738 + memset(&req, 0, sizeof(req));
4739 +
4740 + for (i = 0; i < count; i++, req_old++) {
4741 + req.reg_num = req_old->reg_num;
4742 + req.reg_set = req_old->reg_set;
4743 + req.reg_value = req_old->reg_value;
4744 + /* flags passed with pmcs in v2.0 */
4745 +
4746 + req.reg_long_reset = req_old->reg_long_reset;
4747 + req.reg_short_reset = req_old->reg_short_reset;
4748 + req.reg_random_mask = req_old->reg_random_mask;
4749 + /*
4750 + * reg_random_seed is ignored since v2.3
4751 + */
4752 +
4753 + /*
4754 + * skip last_reset_val not used for writing
4755 + * skip smpl_pmds, reset_pmds, eventid, ovfl_swtch_cnt
4756 + * as set in pfm_write_pmcs_old.
4757 + *
4758 + * ovfl_switch_cnt ignored, not implemented in v2.0
4759 + */
4760 + ret = __pfm_write_pmds(ctx, (void *)&req, 1, 1);
4761 +
4762 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4763 + req_old->reg_flags |= req.reg_flags;
4764 +
4765 + if (ret)
4766 + return ret;
4767 + }
4768 + return 0;
4769 +}
4770 +
4771 +static long pfm_write_pmds_old(int fd, void __user *ureq, int count)
4772 +{
4773 + struct pfm_context *ctx;
4774 + struct task_struct *task;
4775 + struct file *filp;
4776 + struct pfarg_reg *req = NULL;
4777 + void *fptr, *resume;
4778 + unsigned long flags;
4779 + size_t sz;
4780 + int ret, fput_needed;
4781 +
4782 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4783 + return -EINVAL;
4784 +
4785 + sz = count*sizeof(*req);
4786 +
4787 + filp = fget_light(fd, &fput_needed);
4788 + if (unlikely(filp == NULL)) {
4789 + PFM_DBG("invalid fd %d", fd);
4790 + return -EBADF;
4791 + }
4792 +
4793 + ctx = filp->private_data;
4794 + ret = -EBADF;
4795 +
4796 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4797 + PFM_DBG("fd %d not related to perfmon", fd);
4798 + goto error;
4799 + }
4800 +
4801 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4802 + if (ret)
4803 + goto error;
4804 +
4805 + spin_lock_irqsave(&ctx->lock, flags);
4806 +
4807 + task = ctx->task;
4808 +
4809 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4810 + if (ret == 0)
4811 + ret = __pfm_write_pmds_old(ctx, req, count);
4812 +
4813 + spin_unlock_irqrestore(&ctx->lock, flags);
4814 +
4815 + if (copy_to_user(ureq, req, sz))
4816 + ret = -EFAULT;
4817 +
4818 + if (resume)
4819 + pfm_resume_task(task, resume);
4820 +
4821 + kfree(fptr);
4822 +error:
4823 + fput_light(filp, fput_needed);
4824 + return ret;
4825 +}
4826 +
4827 +int __pfm_read_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
4828 + int count)
4829 +{
4830 + struct pfarg_pmd req;
4831 + int i, ret;
4832 +
4833 + memset(&req, 0, sizeof(req));
4834 +
4835 + for (i = 0; i < count; i++, req_old++) {
4836 + req.reg_num = req_old->reg_num;
4837 + req.reg_set = req_old->reg_set;
4838 +
4839 + /* skip value not used for reading */
4840 + req.reg_flags = req_old->reg_flags;
4841 +
4842 + /* skip short/long_reset not used for reading */
4843 + /* skip last_reset_val not used for reading */
4844 + /* skip ovfl_switch_cnt not used for reading */
4845 +
4846 + ret = __pfm_read_pmds(ctx, (void *)&req, 1);
4847 +
4848 + req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
4849 + req_old->reg_flags |= req.reg_flags;
4850 + if (ret)
4851 + return ret;
4852 +
4853 + /* update fields */
4854 + req_old->reg_value = req.reg_value;
4855 +
4856 + req_old->reg_last_reset_val = req.reg_last_reset_val;
4857 + req_old->reg_ovfl_switch_cnt = req.reg_ovfl_switch_cnt;
4858 + }
4859 + return 0;
4860 +}
4861 +
4862 +static long pfm_read_pmds_old(int fd, void __user *ureq, int count)
4863 +{
4864 + struct pfm_context *ctx;
4865 + struct task_struct *task;
4866 + struct file *filp;
4867 + struct pfarg_reg *req = NULL;
4868 + void *fptr, *resume;
4869 + unsigned long flags;
4870 + size_t sz;
4871 + int ret, fput_needed;
4872 +
4873 + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
4874 + return -EINVAL;
4875 +
4876 + sz = count*sizeof(*req);
4877 +
4878 + filp = fget_light(fd, &fput_needed);
4879 + if (unlikely(filp == NULL)) {
4880 + PFM_DBG("invalid fd %d", fd);
4881 + return -EBADF;
4882 + }
4883 +
4884 + ctx = filp->private_data;
4885 + ret = -EBADF;
4886 +
4887 + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
4888 + PFM_DBG("fd %d not related to perfmon", fd);
4889 + goto error;
4890 + }
4891 +
4892 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4893 + if (ret)
4894 + goto error;
4895 +
4896 + spin_lock_irqsave(&ctx->lock, flags);
4897 +
4898 + task = ctx->task;
4899 +
4900 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
4901 + if (ret == 0)
4902 + ret = __pfm_read_pmds_old(ctx, req, count);
4903 +
4904 + spin_unlock_irqrestore(&ctx->lock, flags);
4905 +
4906 + if (resume)
4907 + pfm_resume_task(task, resume);
4908 +
4909 + if (copy_to_user(ureq, req, sz))
4910 + ret = -EFAULT;
4911 +
4912 + kfree(fptr);
4913 +error:
4914 + fput_light(filp, fput_needed);
4915 + return ret;
4916 +}
4917 +
4918 +/*
4919 + * OBSOLETE: use /proc/perfmon_map instead
4920 + */
4921 +static long pfm_get_default_pmcs_old(int fd, void __user *ureq, int count)
4922 +{
4923 + struct pfarg_reg *req = NULL;
4924 + void *fptr;
4925 + size_t sz;
4926 + int ret, i;
4927 + unsigned int cnum;
4928 +
4929 + if (count < 1)
4930 + return -EINVAL;
4931 +
4932 + /*
4933 + * ensure the pfm_pmu_conf does not disappear while
4934 + * we use it
4935 + */
4936 + ret = pfm_pmu_conf_get(1);
4937 + if (ret)
4938 + return ret;
4939 +
4940 + sz = count*sizeof(*ureq);
4941 +
4942 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
4943 + if (ret)
4944 + goto error;
4945 +
4946 +
4947 + for (i = 0; i < count; i++, req++) {
4948 + cnum = req->reg_num;
4949 +
4950 + if (i >= PFM_MAX_PMCS ||
4951 + (pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_I) == 0) {
4952 + req->reg_flags = PFM_REG_RETFL_EINVAL;
4953 + break;
4954 + }
4955 + req->reg_value = pfm_pmu_conf->pmc_desc[cnum].dfl_val;
4956 + req->reg_flags = 0;
4957 +
4958 + PFM_DBG("pmc[%u]=0x%lx", cnum, req->reg_value);
4959 + }
4960 +
4961 + if (copy_to_user(ureq, req, sz))
4962 + ret = -EFAULT;
4963 +
4964 + kfree(fptr);
4965 +error:
4966 + pfm_pmu_conf_put();
4967 +
4968 + return ret;
4969 +}
4970 +
4971 +/*
4972 + * allocate a sampling buffer and remaps it into the user address space of
4973 + * the task. This is only in compatibility mode
4974 + *
4975 + * function called ONLY on current task
4976 + */
4977 +int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, size_t rsize,
4978 + struct file *filp)
4979 +{
4980 + struct mm_struct *mm = current->mm;
4981 + struct vm_area_struct *vma = NULL;
4982 + struct pfm_arch_context *ctx_arch;
4983 + size_t size;
4984 + int ret;
4985 + extern struct vm_operations_struct pfm_buf_map_vm_ops;
4986 +
4987 + ctx_arch = pfm_ctx_arch(ctx);
4988 +
4989 + /*
4990 + * allocate buffer + map desc
4991 + */
4992 + ret = pfm_smpl_buf_alloc(ctx, rsize);
4993 + if (ret)
4994 + return ret;
4995 +
4996 + size = ctx->smpl_size;
4997 +
4998 +
4999 + /* allocate vma */
5000 + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
5001 + if (!vma) {
5002 + PFM_DBG("Cannot allocate vma");
5003 + goto error_kmem;
5004 + }
5005 + memset(vma, 0, sizeof(*vma));
5006 +
5007 + /*
5008 + * partially initialize the vma for the sampling buffer
5009 + */
5010 + vma->vm_mm = mm;
5011 + vma->vm_flags = VM_READ | VM_MAYREAD | VM_RESERVED;
5012 + vma->vm_page_prot = PAGE_READONLY;
5013 + vma->vm_ops = &pfm_buf_map_vm_ops;
5014 + vma->vm_file = filp;
5015 + vma->vm_private_data = ctx;
5016 + vma->vm_pgoff = 0;
5017 +
5018 + /*
5019 + * simulate effect of mmap()
5020 + */
5021 + get_file(filp);
5022 +
5023 + /*
5024 + * Let's do the difficult operations next.
5025 + *
5026 + * now we atomically find some area in the address space and
5027 + * remap the buffer into it.
5028 + */
5029 + down_write(&current->mm->mmap_sem);
5030 +
5031 + /* find some free area in address space, must have mmap sem held */
5032 + vma->vm_start = get_unmapped_area(NULL, 0, size, 0,
5033 + MAP_PRIVATE|MAP_ANONYMOUS);
5034 + if (vma->vm_start == 0) {
5035 + PFM_DBG("cannot find unmapped area of size %zu", size);
5036 + up_write(&current->mm->mmap_sem);
5037 + goto error;
5038 + }
5039 + vma->vm_end = vma->vm_start + size;
5040 +
5041 + PFM_DBG("aligned_size=%zu mapped @0x%lx", size, vma->vm_start);
5042 + /*
5043 + * now insert the vma in the vm list for the process, must be
5044 + * done with mmap lock held
5045 + */
5046 + insert_vm_struct(mm, vma);
5047 +
5048 + mm->total_vm += size >> PAGE_SHIFT;
5049 +
5050 + up_write(&current->mm->mmap_sem);
5051 +
5052 + /*
5053 + * IMPORTANT: we do not issue the fput()
5054 + * because we want to increase the ref count
5055 + * on the descriptor to simulate what mmap()
5056 + * would do
5057 + */
5058 +
5059 + /*
5060 + * used to propagate vaddr to syscall stub
5061 + */
5062 + ctx_arch->ctx_smpl_vaddr = (void *)vma->vm_start;
5063 +
5064 + return 0;
5065 +error:
5066 + kmem_cache_free(vm_area_cachep, vma);
5067 +error_kmem:
5068 + pfm_smpl_buf_space_release(ctx, ctx->smpl_size);
5069 + vfree(ctx->smpl_addr);
5070 + return -ENOMEM;
5071 +}
5072 +
5073 +#define PFM_DEFAULT_SMPL_UUID { \
5074 + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\
5075 + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
5076 +
5077 +static pfm_uuid_t old_default_uuid = PFM_DEFAULT_SMPL_UUID;
5078 +static pfm_uuid_t null_uuid;
5079 +
5080 +/*
5081 + * function invoked in case, pfm_context_create fails
5082 + * at the last operation, copy_to_user. It needs to
5083 + * undo memory allocations and free the file descriptor
5084 + */
5085 +static void pfm_undo_create_context_fd(int fd, struct pfm_context *ctx)
5086 +{
5087 + struct files_struct *files = current->files;
5088 + struct file *file;
5089 + int fput_needed;
5090 +
5091 + file = fget_light(fd, &fput_needed);
5092 + /*
5093 + * there is no fd_uninstall(), so we do it
5094 + * here. put_unused_fd() does not remove the
5095 + * effect of fd_install().
5096 + */
5097 +
5098 + spin_lock(&files->file_lock);
5099 + files->fd_array[fd] = NULL;
5100 + spin_unlock(&files->file_lock);
5101 +
5102 + fput_light(file, fput_needed);
5103 +
5104 + /*
5105 + * decrement ref count and kill file
5106 + */
5107 + put_filp(file);
5108 +
5109 + put_unused_fd(fd);
5110 +
5111 + pfm_free_context(ctx);
5112 +}
5113 +
5114 +static int pfm_get_smpl_arg_old(pfm_uuid_t uuid, void __user *fmt_uarg,
5115 + size_t usize, void **arg,
5116 + struct pfm_smpl_fmt **fmt)
5117 +{
5118 + struct pfm_smpl_fmt *f;
5119 + void *addr = NULL;
5120 + size_t sz;
5121 + int ret;
5122 +
5123 + if (!memcmp(uuid, null_uuid, sizeof(pfm_uuid_t)))
5124 + return 0;
5125 +
5126 + if (memcmp(uuid, old_default_uuid, sizeof(pfm_uuid_t))) {
5127 + PFM_DBG("compatibility mode supports only default sampling format");
5128 + return -EINVAL;
5129 + }
5130 + /*
5131 + * find fmt and increase refcount
5132 + */
5133 + f = pfm_smpl_fmt_get("default-old");
5134 + if (f == NULL) {
5135 + PFM_DBG("default-old buffer format not found");
5136 + return -EINVAL;
5137 + }
5138 +
5139 + /*
5140 + * expected format argument size
5141 + */
5142 + sz = f->fmt_arg_size;
5143 +
5144 + /*
5145 + * check user size matches expected size
5146 + * usize = -1 is for IA-64 backward compatibility
5147 + */
5148 + ret = -EINVAL;
5149 + if (sz != usize && usize != -1) {
5150 + PFM_DBG("invalid arg size %zu, format expects %zu",
5151 + usize, sz);
5152 + goto error;
5153 + }
5154 +
5155 + ret = -ENOMEM;
5156 + addr = kmalloc(sz, GFP_KERNEL);
5157 + if (addr == NULL)
5158 + goto error;
5159 +
5160 + ret = -EFAULT;
5161 + if (copy_from_user(addr, fmt_uarg, sz))
5162 + goto error;
5163 +
5164 + *arg = addr;
5165 + *fmt = f;
5166 + return 0;
5167 +
5168 +error:
5169 + kfree(addr);
5170 + pfm_smpl_fmt_put(f);
5171 + return ret;
5172 +}
5173 +
5174 +static long pfm_create_context_old(int fd, void __user *ureq, int count)
5175 +{
5176 + struct pfm_context *new_ctx;
5177 + struct pfm_arch_context *ctx_arch;
5178 + struct pfm_smpl_fmt *fmt = NULL;
5179 + struct pfarg_context req_old;
5180 + void __user *usmpl_arg;
5181 + void *smpl_arg = NULL;
5182 + struct pfarg_ctx req;
5183 + int ret;
5184 +
5185 + if (count != 1)
5186 + return -EINVAL;
5187 +
5188 + if (copy_from_user(&req_old, ureq, sizeof(req_old)))
5189 + return -EFAULT;
5190 +
5191 + memset(&req, 0, sizeof(req));
5192 +
5193 + /*
5194 + * sampling format args are following pfarg_context
5195 + */
5196 + usmpl_arg = ureq+sizeof(req_old);
5197 +
5198 + ret = pfm_get_smpl_arg_old(req_old.ctx_smpl_buf_id, usmpl_arg, -1,
5199 + &smpl_arg, &fmt);
5200 + if (ret)
5201 + return ret;
5202 +
5203 + req.ctx_flags = req_old.ctx_flags;
5204 +
5205 + /*
5206 + * returns file descriptor if >=0, or error code */
5207 + ret = __pfm_create_context(&req, fmt, smpl_arg, PFM_COMPAT, &new_ctx);
5208 + if (ret >= 0) {
5209 + ctx_arch = pfm_ctx_arch(new_ctx);
5210 + req_old.ctx_fd = ret;
5211 + req_old.ctx_smpl_vaddr = ctx_arch->ctx_smpl_vaddr;
5212 + }
5213 +
5214 + if (copy_to_user(ureq, &req_old, sizeof(req_old))) {
5215 + pfm_undo_create_context_fd(req_old.ctx_fd, new_ctx);
5216 + ret = -EFAULT;
5217 + }
5218 +
5219 + kfree(smpl_arg);
5220 +
5221 + return ret;
5222 +}
5223 +
5224 +/*
5225 + * obsolete call: use /proc/perfmon
5226 + */
5227 +static long pfm_get_features_old(int fd, void __user *arg, int count)
5228 +{
5229 + struct pfarg_features req;
5230 + int ret = 0;
5231 +
5232 + if (count != 1)
5233 + return -EINVAL;
5234 +
5235 + memset(&req, 0, sizeof(req));
5236 +
5237 + req.ft_version = PFM_VERSION;
5238 +
5239 + if (copy_to_user(arg, &req, sizeof(req)))
5240 + ret = -EFAULT;
5241 +
5242 + return ret;
5243 +}
5244 +
5245 +static long pfm_debug_old(int fd, void __user *arg, int count)
5246 +{
5247 + int m;
5248 +
5249 + if (count != 1)
5250 + return -EINVAL;
5251 +
5252 + if (get_user(m, (int __user *)arg))
5253 + return -EFAULT;
5254 +
5255 +
5256 + pfm_controls.debug = m == 0 ? 0 : 1;
5257 +
5258 + PFM_INFO("debugging %s (timing reset)",
5259 + pfm_controls.debug ? "on" : "off");
5260 +
5261 + if (m == 0)
5262 + for_each_online_cpu(m) {
5263 + memset(&per_cpu(pfm_stats, m), 0,
5264 + sizeof(struct pfm_stats));
5265 + }
5266 + return 0;
5267 +}
5268 +
5269 +static long pfm_unload_context_old(int fd, void __user *arg, int count)
5270 +{
5271 + if (count)
5272 + return -EINVAL;
5273 +
5274 + return sys_pfm_unload_context(fd);
5275 +}
5276 +
5277 +static long pfm_restart_old(int fd, void __user *arg, int count)
5278 +{
5279 + if (count)
5280 + return -EINVAL;
5281 +
5282 + return sys_pfm_restart(fd);
5283 +}
5284 +
5285 +static long pfm_stop_old(int fd, void __user *arg, int count)
5286 +{
5287 + if (count)
5288 + return -EINVAL;
5289 +
5290 + return sys_pfm_stop(fd);
5291 +}
5292 +
5293 +static long pfm_start_old(int fd, void __user *arg, int count)
5294 +{
5295 + if (count > 1)
5296 + return -EINVAL;
5297 +
5298 + return sys_pfm_start(fd, arg);
5299 +}
5300 +
5301 +static long pfm_load_context_old(int fd, void __user *ureq, int count)
5302 +{
5303 + if (count != 1)
5304 + return -EINVAL;
5305 +
5306 + return sys_pfm_load_context(fd, ureq);
5307 +}
5308 +
5309 +/*
5310 + * perfmon command descriptions
5311 + */
5312 +struct pfm_cmd_desc {
5313 + long (*cmd_func)(int fd, void __user *arg, int count);
5314 +};
5315 +
5316 +/*
5317 + * functions MUST be listed in the increasing order of
5318 + * their index (see permfon.h)
5319 + */
5320 +#define PFM_CMD(name) \
5321 + { .cmd_func = name, \
5322 + }
5323 +#define PFM_CMD_NONE \
5324 + { .cmd_func = NULL \
5325 + }
5326 +
5327 +static struct pfm_cmd_desc pfm_cmd_tab[] = {
5328 +/* 0 */PFM_CMD_NONE,
5329 +/* 1 */PFM_CMD(pfm_write_pmcs_old),
5330 +/* 2 */PFM_CMD(pfm_write_pmds_old),
5331 +/* 3 */PFM_CMD(pfm_read_pmds_old),
5332 +/* 4 */PFM_CMD(pfm_stop_old),
5333 +/* 5 */PFM_CMD(pfm_start_old),
5334 +/* 6 */PFM_CMD_NONE,
5335 +/* 7 */PFM_CMD_NONE,
5336 +/* 8 */PFM_CMD(pfm_create_context_old),
5337 +/* 9 */PFM_CMD_NONE,
5338 +/* 10 */PFM_CMD(pfm_restart_old),
5339 +/* 11 */PFM_CMD_NONE,
5340 +/* 12 */PFM_CMD(pfm_get_features_old),
5341 +/* 13 */PFM_CMD(pfm_debug_old),
5342 +/* 14 */PFM_CMD_NONE,
5343 +/* 15 */PFM_CMD(pfm_get_default_pmcs_old),
5344 +/* 16 */PFM_CMD(pfm_load_context_old),
5345 +/* 17 */PFM_CMD(pfm_unload_context_old),
5346 +/* 18 */PFM_CMD_NONE,
5347 +/* 19 */PFM_CMD_NONE,
5348 +/* 20 */PFM_CMD_NONE,
5349 +/* 21 */PFM_CMD_NONE,
5350 +/* 22 */PFM_CMD_NONE,
5351 +/* 23 */PFM_CMD_NONE,
5352 +/* 24 */PFM_CMD_NONE,
5353 +/* 25 */PFM_CMD_NONE,
5354 +/* 26 */PFM_CMD_NONE,
5355 +/* 27 */PFM_CMD_NONE,
5356 +/* 28 */PFM_CMD_NONE,
5357 +/* 29 */PFM_CMD_NONE,
5358 +/* 30 */PFM_CMD_NONE,
5359 +/* 31 */PFM_CMD_NONE,
5360 +/* 32 */PFM_CMD(pfm_write_ibrs_old),
5361 +/* 33 */PFM_CMD(pfm_write_dbrs_old),
5362 +};
5363 +#define PFM_CMD_COUNT ARRAY_SIZE(pfm_cmd_tab)
5364 +
5365 +/*
5366 + * system-call entry point (must return long)
5367 + */
5368 +asmlinkage long sys_perfmonctl(int fd, int cmd, void __user *arg, int count)
5369 +{
5370 + if (perfmon_disabled)
5371 + return -ENOSYS;
5372 +
5373 + if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT
5374 + || pfm_cmd_tab[cmd].cmd_func == NULL)) {
5375 + PFM_DBG("invalid cmd=%d", cmd);
5376 + return -EINVAL;
5377 + }
5378 + return (long)pfm_cmd_tab[cmd].cmd_func(fd, arg, count);
5379 +}
5380 +
5381 +/*
5382 + * Called from pfm_read() for a perfmon v2.0 context.
5383 + *
5384 + * compatibility mode pfm_read() routine. We need a separate
5385 + * routine because the definition of the message has changed.
5386 + * The pfm_msg and pfarg_msg structures are different.
5387 + *
5388 + * return: sizeof(pfm_msg_t) on success, -errno otherwise
5389 + */
5390 +ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
5391 + char __user *buf,
5392 + int non_block,
5393 + size_t size)
5394 +{
5395 + union pfarg_msg msg_buf;
5396 + pfm_msg_t old_msg_buf;
5397 + pfm_ovfl_msg_t *o_msg;
5398 + struct pfarg_ovfl_msg *n_msg;
5399 + int ret;
5400 +
5401 + PFM_DBG("msg=%p size=%zu", buf, size);
5402 +
5403 + /*
5404 + * cannot extract partial messages.
5405 + * check even when there is no message
5406 + *
5407 + * cannot extract more than one message per call. Bytes
5408 + * above sizeof(msg) are ignored.
5409 + */
5410 + if (size < sizeof(old_msg_buf)) {
5411 + PFM_DBG("message is too small size=%zu must be >=%zu)",
5412 + size,
5413 + sizeof(old_msg_buf));
5414 + return -EINVAL;
5415 + }
5416 +
5417 + ret = __pfm_read(ctx, &msg_buf, non_block);
5418 + if (ret < 1)
5419 + return ret;
5420 +
5421 + /*
5422 + * force return value to old message size
5423 + */
5424 + ret = sizeof(old_msg_buf);
5425 +
5426 + o_msg = &old_msg_buf.pfm_ovfl_msg;
5427 + n_msg = &msg_buf.pfm_ovfl_msg;
5428 +
5429 + switch (msg_buf.type) {
5430 + case PFM_MSG_OVFL:
5431 + o_msg->msg_type = PFM_MSG_OVFL;
5432 + o_msg->msg_ctx_fd = 0;
5433 + o_msg->msg_active_set = n_msg->msg_active_set;
5434 + o_msg->msg_tstamp = 0;
5435 +
5436 + o_msg->msg_ovfl_pmds[0] = n_msg->msg_ovfl_pmds[0];
5437 + o_msg->msg_ovfl_pmds[1] = n_msg->msg_ovfl_pmds[1];
5438 + o_msg->msg_ovfl_pmds[2] = n_msg->msg_ovfl_pmds[2];
5439 + o_msg->msg_ovfl_pmds[3] = n_msg->msg_ovfl_pmds[3];
5440 + break;
5441 + case PFM_MSG_END:
5442 + o_msg->msg_type = PFM_MSG_END;
5443 + o_msg->msg_ctx_fd = 0;
5444 + o_msg->msg_tstamp = 0;
5445 + break;
5446 + default:
5447 + PFM_DBG("unknown msg type=%d", msg_buf.type);
5448 + }
5449 + if (copy_to_user(buf, &old_msg_buf, sizeof(old_msg_buf)))
5450 + ret = -EFAULT;
5451 + PFM_DBG_ovfl("ret=%d", ret);
5452 + return ret;
5453 +}
5454 +
5455 +/*
5456 + * legacy /proc/perfmon simplified interface (we only maintain the
5457 + * global information (no more per-cpu stats, use
5458 + * /sys/devices/system/cpu/cpuXX/perfmon
5459 + */
5460 +static struct proc_dir_entry *perfmon_proc;
5461 +
5462 +static void *pfm_proc_start(struct seq_file *m, loff_t *pos)
5463 +{
5464 + if (*pos == 0)
5465 + return (void *)1;
5466 +
5467 + return NULL;
5468 +}
5469 +
5470 +static void *pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
5471 +{
5472 + ++*pos;
5473 + return pfm_proc_start(m, pos);
5474 +}
5475 +
5476 +static void pfm_proc_stop(struct seq_file *m, void *v)
5477 +{
5478 +}
5479 +
5480 +/*
5481 + * this is a simplified version of the legacy /proc/perfmon.
5482 + * We have retained ONLY the key information that tools are actually
5483 + * using
5484 + */
5485 +static void pfm_proc_show_header(struct seq_file *m)
5486 +{
5487 + char buf[128];
5488 +
5489 + pfm_sysfs_res_show(buf, sizeof(buf), 3);
5490 +
5491 + seq_printf(m, "perfmon version : %u.%u\n",
5492 + PFM_VERSION_MAJ, PFM_VERSION_MIN);
5493 +
5494 + seq_printf(m, "model : %s", buf);
5495 +}
5496 +
5497 +static int pfm_proc_show(struct seq_file *m, void *v)
5498 +{
5499 + pfm_proc_show_header(m);
5500 + return 0;
5501 +}
5502 +
5503 +struct seq_operations pfm_proc_seq_ops = {
5504 + .start = pfm_proc_start,
5505 + .next = pfm_proc_next,
5506 + .stop = pfm_proc_stop,
5507 + .show = pfm_proc_show
5508 +};
5509 +
5510 +static int pfm_proc_open(struct inode *inode, struct file *file)
5511 +{
5512 + return seq_open(file, &pfm_proc_seq_ops);
5513 +}
5514 +
5515 +
5516 +static struct file_operations pfm_proc_fops = {
5517 + .open = pfm_proc_open,
5518 + .read = seq_read,
5519 + .llseek = seq_lseek,
5520 + .release = seq_release,
5521 +};
5522 +
5523 +/*
5524 + * called from pfm_arch_init(), global initialization, called once
5525 + */
5526 +int __init pfm_ia64_compat_init(void)
5527 +{
5528 + /*
5529 + * create /proc/perfmon
5530 + */
5531 + perfmon_proc = create_proc_entry("perfmon", S_IRUGO, NULL);
5532 + if (perfmon_proc == NULL) {
5533 + PFM_ERR("cannot create /proc entry, perfmon disabled");
5534 + return -1;
5535 + }
5536 + perfmon_proc->proc_fops = &pfm_proc_fops;
5537 + return 0;
5538 +}
5539 diff --git a/arch/ia64/perfmon/perfmon_default_smpl.c b/arch/ia64/perfmon/perfmon_default_smpl.c
5540 new file mode 100644
5541 index 0000000..b408a13
5542 --- /dev/null
5543 +++ b/arch/ia64/perfmon/perfmon_default_smpl.c
5544 @@ -0,0 +1,273 @@
5545 +/*
5546 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
5547 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
5548 + *
5549 + * This file implements the old default sampling buffer format
5550 + * for the Linux/ia64 perfmon-2 subsystem. This is for backward
5551 + * compatibility only. use the new default format in perfmon/
5552 + *
5553 + * This program is free software; you can redistribute it and/or
5554 + * modify it under the terms of version 2 of the GNU General Public
5555 + * License as published by the Free Software Foundation.
5556 + *
5557 + * This program is distributed in the hope that it will be useful,
5558 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5559 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5560 + * General Public License for more details.
5561 + *
5562 + * You should have received a copy of the GNU General Public License
5563 + * along with this program; if not, write to the Free Software
5564 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
5565 + * 02111-1307 USA
5566 + */
5567 +#include <linux/kernel.h>
5568 +#include <linux/types.h>
5569 +#include <linux/module.h>
5570 +#include <linux/init.h>
5571 +#include <linux/delay.h>
5572 +#include <linux/smp.h>
5573 +#include <linux/sysctl.h>
5574 +
5575 +#ifdef MODULE
5576 +#define FMT_FLAGS 0
5577 +#else
5578 +#define FMT_FLAGS PFM_FMTFL_IS_BUILTIN
5579 +#endif
5580 +
5581 +#include <linux/perfmon_kern.h>
5582 +#include <asm/perfmon_default_smpl.h>
5583 +
5584 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
5585 +MODULE_DESCRIPTION("perfmon old default sampling format");
5586 +MODULE_LICENSE("GPL");
5587 +
5588 +static int pfm_default_fmt_validate(u32 flags, u16 npmds, void *data)
5589 +{
5590 + struct pfm_default_smpl_arg *arg = data;
5591 + size_t min_buf_size;
5592 +
5593 + if (data == NULL) {
5594 + PFM_DBG("no argument passed");
5595 + return -EINVAL;
5596 + }
5597 +
5598 + /*
5599 + * compute min buf size. All PMD are manipulated as 64bit entities
5600 + */
5601 + min_buf_size = sizeof(struct pfm_default_smpl_hdr)
5602 + + (sizeof(struct pfm_default_smpl_entry) + (npmds*sizeof(u64)));
5603 +
5604 + PFM_DBG("validate flags=0x%x npmds=%u min_buf_size=%lu "
5605 + "buf_size=%lu CPU%d", flags, npmds, min_buf_size,
5606 + arg->buf_size, smp_processor_id());
5607 +
5608 + /*
5609 + * must hold at least the buffer header + one minimally sized entry
5610 + */
5611 + if (arg->buf_size < min_buf_size)
5612 + return -EINVAL;
5613 +
5614 + return 0;
5615 +}
5616 +
5617 +static int pfm_default_fmt_get_size(unsigned int flags, void *data,
5618 + size_t *size)
5619 +{
5620 + struct pfm_default_smpl_arg *arg = data;
5621 +
5622 + /*
5623 + * size has been validated in default_validate
5624 + */
5625 + *size = arg->buf_size;
5626 +
5627 + return 0;
5628 +}
5629 +
5630 +static int pfm_default_fmt_init(struct pfm_context *ctx, void *buf,
5631 + u32 flags, u16 npmds, void *data)
5632 +{
5633 + struct pfm_default_smpl_hdr *hdr;
5634 + struct pfm_default_smpl_arg *arg = data;
5635 +
5636 + hdr = buf;
5637 +
5638 + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
5639 + hdr->hdr_buf_size = arg->buf_size;
5640 + hdr->hdr_cur_offs = sizeof(*hdr);
5641 + hdr->hdr_overflows = 0;
5642 + hdr->hdr_count = 0;
5643 +
5644 + PFM_DBG("buffer=%p buf_size=%lu hdr_size=%lu "
5645 + "hdr_version=%u cur_offs=%lu",
5646 + buf,
5647 + hdr->hdr_buf_size,
5648 + sizeof(*hdr),
5649 + hdr->hdr_version,
5650 + hdr->hdr_cur_offs);
5651 +
5652 + return 0;
5653 +}
5654 +
5655 +static int pfm_default_fmt_handler(struct pfm_context *ctx,
5656 + unsigned long ip, u64 tstamp, void *data)
5657 +{
5658 + struct pfm_default_smpl_hdr *hdr;
5659 + struct pfm_default_smpl_entry *ent;
5660 + void *cur, *last, *buf;
5661 + u64 *e;
5662 + size_t entry_size;
5663 + u16 npmds, i, ovfl_pmd;
5664 + struct pfm_ovfl_arg *arg;
5665 +
5666 + hdr = ctx->smpl_addr;
5667 + arg = &ctx->ovfl_arg;
5668 +
5669 + buf = hdr;
5670 + cur = buf+hdr->hdr_cur_offs;
5671 + last = buf+hdr->hdr_buf_size;
5672 + ovfl_pmd = arg->ovfl_pmd;
5673 +
5674 + /*
5675 + * precheck for sanity
5676 + */
5677 + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE)
5678 + goto full;
5679 +
5680 + npmds = arg->num_smpl_pmds;
5681 +
5682 + ent = cur;
5683 +
5684 + prefetch(arg->smpl_pmds_values);
5685 +
5686 + entry_size = sizeof(*ent) + (npmds << 3);
5687 +
5688 + /* position for first pmd */
5689 + e = (unsigned long *)(ent+1);
5690 +
5691 + hdr->hdr_count++;
5692 +
5693 + PFM_DBG_ovfl("count=%lu cur=%p last=%p free_bytes=%lu "
5694 + "ovfl_pmd=%d npmds=%u",
5695 + hdr->hdr_count,
5696 + cur, last,
5697 + last-cur,
5698 + ovfl_pmd,
5699 + npmds);
5700 +
5701 + /*
5702 + * current = task running at the time of the overflow.
5703 + *
5704 + * per-task mode:
5705 + * - this is ususally the task being monitored.
5706 + * Under certain conditions, it might be a different task
5707 + *
5708 + * system-wide:
5709 + * - this is not necessarily the task controlling the session
5710 + */
5711 + ent->pid = current->pid;
5712 + ent->ovfl_pmd = ovfl_pmd;
5713 + ent->last_reset_val = arg->pmd_last_reset;
5714 +
5715 + /*
5716 + * where did the fault happen (includes slot number)
5717 + */
5718 + ent->ip = ip;
5719 +
5720 + ent->tstamp = tstamp;
5721 + ent->cpu = smp_processor_id();
5722 + ent->set = arg->active_set;
5723 + ent->tgid = current->tgid;
5724 +
5725 + /*
5726 + * selectively store PMDs in increasing index number
5727 + */
5728 + if (npmds) {
5729 + u64 *val = arg->smpl_pmds_values;
5730 + for (i = 0; i < npmds; i++)
5731 + *e++ = *val++;
5732 + }
5733 +
5734 + /*
5735 + * update position for next entry
5736 + */
5737 + hdr->hdr_cur_offs += entry_size;
5738 + cur += entry_size;
5739 +
5740 + /*
5741 + * post check to avoid losing the last sample
5742 + */
5743 + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE)
5744 + goto full;
5745 +
5746 + /*
5747 + * reset before returning from interrupt handler
5748 + */
5749 + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
5750 + return 0;
5751 +full:
5752 + PFM_DBG_ovfl("smpl buffer full free=%lu, count=%lu",
5753 + last-cur, hdr->hdr_count);
5754 +
5755 + /*
5756 + * increment number of buffer overflow.
5757 + * important to detect duplicate set of samples.
5758 + */
5759 + hdr->hdr_overflows++;
5760 +
5761 + /*
5762 + * request notification and masking of monitoring.
5763 + * Notification is still subject to the overflowed
5764 + */
5765 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
5766 +
5767 + return -ENOBUFS; /* we are full, sorry */
5768 +}
5769 +
5770 +static int pfm_default_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf)
5771 +{
5772 + struct pfm_default_smpl_hdr *hdr;
5773 +
5774 + hdr = buf;
5775 +
5776 + hdr->hdr_count = 0;
5777 + hdr->hdr_cur_offs = sizeof(*hdr);
5778 +
5779 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
5780 +
5781 + return 0;
5782 +}
5783 +
5784 +static int pfm_default_fmt_exit(void *buf)
5785 +{
5786 + return 0;
5787 +}
5788 +
5789 +static struct pfm_smpl_fmt default_fmt = {
5790 + .fmt_name = "default-old",
5791 + .fmt_version = 0x10000,
5792 + .fmt_arg_size = sizeof(struct pfm_default_smpl_arg),
5793 + .fmt_validate = pfm_default_fmt_validate,
5794 + .fmt_getsize = pfm_default_fmt_get_size,
5795 + .fmt_init = pfm_default_fmt_init,
5796 + .fmt_handler = pfm_default_fmt_handler,
5797 + .fmt_restart = pfm_default_fmt_restart,
5798 + .fmt_exit = pfm_default_fmt_exit,
5799 + .fmt_flags = FMT_FLAGS,
5800 + .owner = THIS_MODULE
5801 +};
5802 +
5803 +static int pfm_default_fmt_init_module(void)
5804 +{
5805 + int ret;
5806 +
5807 + return pfm_fmt_register(&default_fmt);
5808 + return ret;
5809 +}
5810 +
5811 +static void pfm_default_fmt_cleanup_module(void)
5812 +{
5813 + pfm_fmt_unregister(&default_fmt);
5814 +}
5815 +
5816 +module_init(pfm_default_fmt_init_module);
5817 +module_exit(pfm_default_fmt_cleanup_module);
5818 diff --git a/arch/ia64/perfmon/perfmon_generic.c b/arch/ia64/perfmon/perfmon_generic.c
5819 new file mode 100644
5820 index 0000000..47b1870
5821 --- /dev/null
5822 +++ b/arch/ia64/perfmon/perfmon_generic.c
5823 @@ -0,0 +1,148 @@
5824 +/*
5825 + * This file contains the generic PMU register description tables
5826 + * and pmc checker used by perfmon.c.
5827 + *
5828 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
5829 + * contributed by Stephane Eranian <eranian@hpl.hp.com>
5830 + *
5831 + * This program is free software; you can redistribute it and/or
5832 + * modify it under the terms of version 2 of the GNU General Public
5833 + * License as published by the Free Software Foundation.
5834 + *
5835 + * This program is distributed in the hope that it will be useful,
5836 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5837 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5838 + * General Public License for more details.
5839 + *
5840 + * You should have received a copy of the GNU General Public License
5841 + * along with this program; if not, write to the Free Software
5842 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
5843 + * 02111-1307 USA
5844 + */
5845 +#include <linux/module.h>
5846 +#include <linux/perfmon_kern.h>
5847 +#include <asm/pal.h>
5848 +
5849 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
5850 +MODULE_DESCRIPTION("Generic IA-64 PMU description tables");
5851 +MODULE_LICENSE("GPL");
5852 +
5853 +#define RDEP(x) (1UL << (x))
5854 +
5855 +#define PFM_IA64GEN_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7))
5856 +#define PFM_IA64GEN_RSVD (0xffffffffffff0080UL)
5857 +#define PFM_IA64GEN_NO64 (1UL<<5)
5858 +
5859 +/* forward declaration */
5860 +static struct pfm_pmu_config pfm_ia64gen_pmu_conf;
5861 +
5862 +static struct pfm_arch_pmu_info pfm_ia64gen_pmu_info = {
5863 + .mask_pmcs = {PFM_IA64GEN_MASK_PMCS,},
5864 +};
5865 +
5866 +static struct pfm_regmap_desc pfm_ia64gen_pmc_desc[] = {
5867 +/* pmc0 */ PMX_NA,
5868 +/* pmc1 */ PMX_NA,
5869 +/* pmc2 */ PMX_NA,
5870 +/* pmc3 */ PMX_NA,
5871 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 4),
5872 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 5),
5873 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 6),
5874 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 7)
5875 +};
5876 +#define PFM_IA64GEN_NUM_PMCS ARRAY_SIZE(pfm_ia64gen_pmc_desc)
5877 +
5878 +static struct pfm_regmap_desc pfm_ia64gen_pmd_desc[] = {
5879 +/* pmd0 */ PMX_NA,
5880 +/* pmd1 */ PMX_NA,
5881 +/* pmd2 */ PMX_NA,
5882 +/* pmd3 */ PMX_NA,
5883 +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
5884 +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
5885 +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
5886 +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7)
5887 +};
5888 +#define PFM_IA64GEN_NUM_PMDS ARRAY_SIZE(pfm_ia64gen_pmd_desc)
5889 +
5890 +static int pfm_ia64gen_pmc_check(struct pfm_context *ctx,
5891 + struct pfm_event_set *set,
5892 + struct pfarg_pmc *req)
5893 +{
5894 +#define PFM_IA64GEN_PMC_PM_POS6 (1UL<<6)
5895 + u64 tmpval;
5896 + int is_system;
5897 +
5898 + is_system = ctx->flags.system;
5899 + tmpval = req->reg_value;
5900 +
5901 + switch (req->reg_num) {
5902 + case 4:
5903 + case 5:
5904 + case 6:
5905 + case 7:
5906 + /* set pmc.oi for 64-bit emulation */
5907 + tmpval |= 1UL << 5;
5908 +
5909 + if (is_system)
5910 + tmpval |= PFM_IA64GEN_PMC_PM_POS6;
5911 + else
5912 + tmpval &= ~PFM_IA64GEN_PMC_PM_POS6;
5913 + break;
5914 +
5915 + }
5916 + req->reg_value = tmpval;
5917 +
5918 + return 0;
5919 +}
5920 +
5921 +/*
5922 + * matches anything
5923 + */
5924 +static int pfm_ia64gen_probe_pmu(void)
5925 +{
5926 + u64 pm_buffer[16];
5927 + pal_perf_mon_info_u_t pm_info;
5928 +
5929 + /*
5930 + * call PAL_PERFMON_INFO to retrieve counter width which
5931 + * is implementation specific
5932 + */
5933 + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info))
5934 + return -1;
5935 +
5936 + pfm_ia64gen_pmu_conf.counter_width = pm_info.pal_perf_mon_info_s.width;
5937 +
5938 + return 0;
5939 +}
5940 +
5941 +/*
5942 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
5943 + */
5944 +static struct pfm_pmu_config pfm_ia64gen_pmu_conf = {
5945 + .pmu_name = "Generic IA-64",
5946 + .counter_width = 0, /* computed from PAL_PERFMON_INFO */
5947 + .pmd_desc = pfm_ia64gen_pmd_desc,
5948 + .pmc_desc = pfm_ia64gen_pmc_desc,
5949 + .probe_pmu = pfm_ia64gen_probe_pmu,
5950 + .num_pmc_entries = PFM_IA64GEN_NUM_PMCS,
5951 + .num_pmd_entries = PFM_IA64GEN_NUM_PMDS,
5952 + .pmc_write_check = pfm_ia64gen_pmc_check,
5953 + .version = "1.0",
5954 + .flags = PFM_PMU_BUILTIN_FLAG,
5955 + .owner = THIS_MODULE,
5956 + .pmu_info = &pfm_ia64gen_pmu_info
5957 + /* no read/write checkers */
5958 +};
5959 +
5960 +static int __init pfm_gen_pmu_init_module(void)
5961 +{
5962 + return pfm_pmu_register(&pfm_ia64gen_pmu_conf);
5963 +}
5964 +
5965 +static void __exit pfm_gen_pmu_cleanup_module(void)
5966 +{
5967 + pfm_pmu_unregister(&pfm_ia64gen_pmu_conf);
5968 +}
5969 +
5970 +module_init(pfm_gen_pmu_init_module);
5971 +module_exit(pfm_gen_pmu_cleanup_module);
5972 diff --git a/arch/ia64/perfmon/perfmon_itanium.c b/arch/ia64/perfmon/perfmon_itanium.c
5973 new file mode 100644
5974 index 0000000..094b31b
5975 --- /dev/null
5976 +++ b/arch/ia64/perfmon/perfmon_itanium.c
5977 @@ -0,0 +1,232 @@
5978 +/*
5979 + * This file contains the Itanium PMU register description tables
5980 + * and pmc checker used by perfmon.c.
5981 + *
5982 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
5983 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
5984 + *
5985 + * This program is free software; you can redistribute it and/or
5986 + * modify it under the terms of version 2 of the GNU General Public
5987 + * License as published by the Free Software Foundation.
5988 + *
5989 + * This program is distributed in the hope that it will be useful,
5990 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
5991 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5992 + * General Public License for more details.
5993 + *
5994 + * You should have received a copy of the GNU General Public License
5995 + * along with this program; if not, write to the Free Software
5996 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
5997 + * 02111-1307 USA
5998 + */
5999 +#include <linux/module.h>
6000 +#include <linux/perfmon_kern.h>
6001 +
6002 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
6003 +MODULE_DESCRIPTION("Itanium (Merced) PMU description tables");
6004 +MODULE_LICENSE("GPL");
6005 +
6006 +#define RDEP(x) (1ULL << (x))
6007 +
6008 +#define PFM_ITA_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\
6009 + RDEP(12))
6010 +
6011 +#define PFM_ITA_NO64 (1ULL<<5)
6012 +
6013 +static struct pfm_arch_pmu_info pfm_ita_pmu_info = {
6014 + .mask_pmcs = {PFM_ITA_MASK_PMCS,},
6015 +};
6016 +/* reserved bits are 1 in the mask */
6017 +#define PFM_ITA_RSVD 0xfffffffffc8000a0UL
6018 +/*
6019 + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
6020 + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
6021 + * but this is fine because they are handled separately in the IA-64 specific
6022 + * code.
6023 + */
6024 +static struct pfm_regmap_desc pfm_ita_pmc_desc[] = {
6025 +/* pmc0 */ PMX_NA,
6026 +/* pmc1 */ PMX_NA,
6027 +/* pmc2 */ PMX_NA,
6028 +/* pmc3 */ PMX_NA,
6029 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 4),
6030 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 5),
6031 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 6),
6032 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 7),
6033 +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 8),
6034 +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 9),
6035 +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xfffffffff3f0ff30UL, 0, 10),
6036 +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x10000000UL, 0xffffffffecf0ff30UL, 0, 11),
6037 +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0030UL, 0, 12),
6038 +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x3ffff00000001UL, 0xfffffffffffffffeUL, 0, 13),
6039 +/* pmc14 */ PMX_NA,
6040 +/* pmc15 */ PMX_NA,
6041 +/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6042 +/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6043 +/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6044 +/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6045 +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6046 +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6047 +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6048 +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6049 +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6050 +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6051 +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6052 +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6053 +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6054 +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6055 +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6056 +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6057 +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6058 +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6059 +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6060 +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6061 +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6062 +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6063 +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6064 +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6065 +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6066 +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6067 +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6068 +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6069 +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6070 +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6071 +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0),
6072 +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
6073 +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2),
6074 +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
6075 +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4),
6076 +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
6077 +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6),
6078 +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
6079 +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0),
6080 +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
6081 +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2),
6082 +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
6083 +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4),
6084 +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
6085 +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6),
6086 +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
6087 +};
6088 +#define PFM_ITA_NUM_PMCS ARRAY_SIZE(pfm_ita_pmc_desc)
6089 +
6090 +static struct pfm_regmap_desc pfm_ita_pmd_desc[] = {
6091 +/* pmd0 */ PMD_DP(PFM_REG_I , "PMD0", 0, 1ull << 10),
6092 +/* pmd1 */ PMD_DP(PFM_REG_I , "PMD1", 1, 1ull << 10),
6093 +/* pmd2 */ PMD_DP(PFM_REG_I , "PMD2", 2, 1ull << 11),
6094 +/* pmd3 */ PMD_DP(PFM_REG_I , "PMD3", 3, 1ull << 11),
6095 +/* pmd4 */ PMD_DP(PFM_REG_C , "PMD4", 4, 1ull << 4),
6096 +/* pmd5 */ PMD_DP(PFM_REG_C , "PMD5", 5, 1ull << 5),
6097 +/* pmd6 */ PMD_DP(PFM_REG_C , "PMD6", 6, 1ull << 6),
6098 +/* pmd7 */ PMD_DP(PFM_REG_C , "PMD7", 7, 1ull << 7),
6099 +/* pmd8 */ PMD_DP(PFM_REG_I , "PMD8", 8, 1ull << 12),
6100 +/* pmd9 */ PMD_DP(PFM_REG_I , "PMD9", 9, 1ull << 12),
6101 +/* pmd10 */ PMD_DP(PFM_REG_I , "PMD10", 10, 1ull << 12),
6102 +/* pmd11 */ PMD_DP(PFM_REG_I , "PMD11", 11, 1ull << 12),
6103 +/* pmd12 */ PMD_DP(PFM_REG_I , "PMD12", 12, 1ull << 12),
6104 +/* pmd13 */ PMD_DP(PFM_REG_I , "PMD13", 13, 1ull << 12),
6105 +/* pmd14 */ PMD_DP(PFM_REG_I , "PMD14", 14, 1ull << 12),
6106 +/* pmd15 */ PMD_DP(PFM_REG_I , "PMD15", 15, 1ull << 12),
6107 +/* pmd16 */ PMD_DP(PFM_REG_I , "PMD16", 16, 1ull << 12),
6108 +/* pmd17 */ PMD_DP(PFM_REG_I , "PMD17", 17, 1ull << 11)
6109 +};
6110 +#define PFM_ITA_NUM_PMDS ARRAY_SIZE(pfm_ita_pmd_desc)
6111 +
6112 +static int pfm_ita_pmc_check(struct pfm_context *ctx,
6113 + struct pfm_event_set *set,
6114 + struct pfarg_pmc *req)
6115 +{
6116 +#define PFM_ITA_PMC_PM_POS6 (1UL<<6)
6117 + struct pfm_arch_context *ctx_arch;
6118 + u64 tmpval;
6119 + u16 cnum;
6120 + int ret = 0, is_system;
6121 +
6122 + tmpval = req->reg_value;
6123 + cnum = req->reg_num;
6124 + ctx_arch = pfm_ctx_arch(ctx);
6125 + is_system = ctx->flags.system;
6126 +
6127 + switch (cnum) {
6128 + case 4:
6129 + case 5:
6130 + case 6:
6131 + case 7:
6132 + case 10:
6133 + case 11:
6134 + case 12:
6135 + if (is_system)
6136 + tmpval |= PFM_ITA_PMC_PM_POS6;
6137 + else
6138 + tmpval &= ~PFM_ITA_PMC_PM_POS6;
6139 + break;
6140 + }
6141 +
6142 + /*
6143 + * we must clear the (instruction) debug registers if pmc13.ta bit is
6144 + * cleared before they are written (fl_using_dbreg==0) to avoid
6145 + * picking up stale information.
6146 + */
6147 + if (cnum == 13 && ((tmpval & 0x1) == 0)
6148 + && ctx_arch->flags.use_dbr == 0) {
6149 + PFM_DBG("pmc13 has pmc13.ta cleared, clearing ibr");
6150 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6151 + if (ret)
6152 + return ret;
6153 + }
6154 +
6155 + /*
6156 + * we must clear the (data) debug registers if pmc11.pt bit is cleared
6157 + * before they are written (fl_using_dbreg==0) to avoid picking up
6158 + * stale information.
6159 + */
6160 + if (cnum == 11 && ((tmpval >> 28) & 0x1) == 0
6161 + && ctx_arch->flags.use_dbr == 0) {
6162 + PFM_DBG("pmc11 has pmc11.pt cleared, clearing dbr");
6163 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6164 + if (ret)
6165 + return ret;
6166 + }
6167 +
6168 + req->reg_value = tmpval;
6169 +
6170 + return 0;
6171 +}
6172 +
6173 +static int pfm_ita_probe_pmu(void)
6174 +{
6175 + return local_cpu_data->family == 0x7 && !ia64_platform_is("hpsim")
6176 + ? 0 : -1;
6177 +}
6178 +
6179 +/*
6180 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
6181 + */
6182 +static struct pfm_pmu_config pfm_ita_pmu_conf = {
6183 + .pmu_name = "Itanium",
6184 + .counter_width = 32,
6185 + .pmd_desc = pfm_ita_pmd_desc,
6186 + .pmc_desc = pfm_ita_pmc_desc,
6187 + .pmc_write_check = pfm_ita_pmc_check,
6188 + .num_pmc_entries = PFM_ITA_NUM_PMCS,
6189 + .num_pmd_entries = PFM_ITA_NUM_PMDS,
6190 + .probe_pmu = pfm_ita_probe_pmu,
6191 + .version = "1.0",
6192 + .flags = PFM_PMU_BUILTIN_FLAG,
6193 + .owner = THIS_MODULE,
6194 + .pmu_info = &pfm_ita_pmu_info
6195 +};
6196 +
6197 +static int __init pfm_ita_pmu_init_module(void)
6198 +{
6199 + return pfm_pmu_register(&pfm_ita_pmu_conf);
6200 +}
6201 +
6202 +static void __exit pfm_ita_pmu_cleanup_module(void)
6203 +{
6204 + pfm_pmu_unregister(&pfm_ita_pmu_conf);
6205 +}
6206 +
6207 +module_init(pfm_ita_pmu_init_module);
6208 +module_exit(pfm_ita_pmu_cleanup_module);
6209 +
6210 diff --git a/arch/ia64/perfmon/perfmon_mckinley.c b/arch/ia64/perfmon/perfmon_mckinley.c
6211 new file mode 100644
6212 index 0000000..dc59092
6213 --- /dev/null
6214 +++ b/arch/ia64/perfmon/perfmon_mckinley.c
6215 @@ -0,0 +1,290 @@
6216 +/*
6217 + * This file contains the McKinley PMU register description tables
6218 + * and pmc checker used by perfmon.c.
6219 + *
6220 + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
6221 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
6222 + *
6223 + * This program is free software; you can redistribute it and/or
6224 + * modify it under the terms of version 2 of the GNU General Public
6225 + * License as published by the Free Software Foundation.
6226 + *
6227 + * This program is distributed in the hope that it will be useful,
6228 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6229 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6230 + * General Public License for more details.
6231 + *
6232 + * You should have received a copy of the GNU General Public License
6233 + * along with this program; if not, write to the Free Software
6234 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
6235 + * 02111-1307 USA
6236 + */
6237 +#include <linux/module.h>
6238 +#include <linux/perfmon_kern.h>
6239 +
6240 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
6241 +MODULE_DESCRIPTION("Itanium 2 (McKinley) PMU description tables");
6242 +MODULE_LICENSE("GPL");
6243 +
6244 +#define RDEP(x) (1UL << (x))
6245 +
6246 +#define PFM_MCK_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\
6247 + RDEP(12))
6248 +
6249 +#define PFM_MCK_NO64 (1UL<<5)
6250 +
6251 +static struct pfm_arch_pmu_info pfm_mck_pmu_info = {
6252 + .mask_pmcs = {PFM_MCK_MASK_PMCS,},
6253 +};
6254 +
6255 +/* reserved bits are 1 in the mask */
6256 +#define PFM_ITA2_RSVD 0xfffffffffc8000a0UL
6257 +
6258 +/*
6259 + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
6260 + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
6261 + * but this is fine because they are handled separately in the IA-64 specific
6262 + * code.
6263 + */
6264 +static struct pfm_regmap_desc pfm_mck_pmc_desc[] = {
6265 +/* pmc0 */ PMX_NA,
6266 +/* pmc1 */ PMX_NA,
6267 +/* pmc2 */ PMX_NA,
6268 +/* pmc3 */ PMX_NA,
6269 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x800020UL, 0xfffffffffc8000a0, PFM_MCK_NO64, 4),
6270 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 5),
6271 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 6),
6272 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 7),
6273 +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xffffffff3fffffffUL, 0xc0000004UL, 0, 8),
6274 +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xffffffff3ffffffcUL, 0xc0000004UL, 0, 9),
6275 +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xffffffffffff0000UL, 0, 10),
6276 +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x0, 0xfffffffffcf0fe30UL, 0, 11),
6277 +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0000UL, 0, 12),
6278 +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x2078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 13),
6279 +/* pmc14 */ PMC_D(PFM_REG_W , "PMC14", 0x0db60db60db60db6UL, 0xffffffffffffdb6dUL, 0, 14),
6280 +/* pmc15 */ PMC_D(PFM_REG_W , "PMC15", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 15),
6281 +/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6282 +/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6283 +/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6284 +/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6285 +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6286 +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6287 +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6288 +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6289 +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6290 +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6291 +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6292 +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6293 +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6294 +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6295 +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6296 +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6297 +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6298 +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6299 +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6300 +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6301 +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6302 +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6303 +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6304 +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6305 +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6306 +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6307 +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6308 +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6309 +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6310 +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6311 +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0),
6312 +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
6313 +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2),
6314 +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
6315 +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4),
6316 +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
6317 +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6),
6318 +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
6319 +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0),
6320 +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
6321 +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2),
6322 +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
6323 +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4),
6324 +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
6325 +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6),
6326 +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
6327 +};
6328 +#define PFM_MCK_NUM_PMCS ARRAY_SIZE(pfm_mck_pmc_desc)
6329 +
6330 +static struct pfm_regmap_desc pfm_mck_pmd_desc[] = {
6331 +/* pmd0 */ PMD_DP(PFM_REG_I, "PMD0", 0, 1ull << 10),
6332 +/* pmd1 */ PMD_DP(PFM_REG_I, "PMD1", 1, 1ull << 10),
6333 +/* pmd2 */ PMD_DP(PFM_REG_I, "PMD2", 2, 1ull << 11),
6334 +/* pmd3 */ PMD_DP(PFM_REG_I, "PMD3", 3, 1ull << 11),
6335 +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
6336 +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
6337 +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
6338 +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7),
6339 +/* pmd8 */ PMD_DP(PFM_REG_I, "PMD8", 8, 1ull << 12),
6340 +/* pmd9 */ PMD_DP(PFM_REG_I, "PMD9", 9, 1ull << 12),
6341 +/* pmd10 */ PMD_DP(PFM_REG_I, "PMD10", 10, 1ull << 12),
6342 +/* pmd11 */ PMD_DP(PFM_REG_I, "PMD11", 11, 1ull << 12),
6343 +/* pmd12 */ PMD_DP(PFM_REG_I, "PMD12", 12, 1ull << 12),
6344 +/* pmd13 */ PMD_DP(PFM_REG_I, "PMD13", 13, 1ull << 12),
6345 +/* pmd14 */ PMD_DP(PFM_REG_I, "PMD14", 14, 1ull << 12),
6346 +/* pmd15 */ PMD_DP(PFM_REG_I, "PMD15", 15, 1ull << 12),
6347 +/* pmd16 */ PMD_DP(PFM_REG_I, "PMD16", 16, 1ull << 12),
6348 +/* pmd17 */ PMD_DP(PFM_REG_I, "PMD17", 17, 1ull << 11)
6349 +};
6350 +#define PFM_MCK_NUM_PMDS ARRAY_SIZE(pfm_mck_pmd_desc)
6351 +
6352 +static int pfm_mck_pmc_check(struct pfm_context *ctx,
6353 + struct pfm_event_set *set,
6354 + struct pfarg_pmc *req)
6355 +{
6356 + struct pfm_arch_context *ctx_arch;
6357 + u64 val8 = 0, val14 = 0, val13 = 0;
6358 + u64 tmpval;
6359 + u16 cnum;
6360 + int ret = 0, check_case1 = 0;
6361 + int is_system;
6362 +
6363 + tmpval = req->reg_value;
6364 + cnum = req->reg_num;
6365 + ctx_arch = pfm_ctx_arch(ctx);
6366 + is_system = ctx->flags.system;
6367 +
6368 +#define PFM_MCK_PMC_PM_POS6 (1UL<<6)
6369 +#define PFM_MCK_PMC_PM_POS4 (1UL<<4)
6370 +
6371 + switch (cnum) {
6372 + case 4:
6373 + case 5:
6374 + case 6:
6375 + case 7:
6376 + case 11:
6377 + case 12:
6378 + if (is_system)
6379 + tmpval |= PFM_MCK_PMC_PM_POS6;
6380 + else
6381 + tmpval &= ~PFM_MCK_PMC_PM_POS6;
6382 + break;
6383 +
6384 + case 8:
6385 + val8 = tmpval;
6386 + val13 = set->pmcs[13];
6387 + val14 = set->pmcs[14];
6388 + check_case1 = 1;
6389 + break;
6390 +
6391 + case 10:
6392 + if (is_system)
6393 + tmpval |= PFM_MCK_PMC_PM_POS4;
6394 + else
6395 + tmpval &= ~PFM_MCK_PMC_PM_POS4;
6396 + break;
6397 +
6398 + case 13:
6399 + val8 = set->pmcs[8];
6400 + val13 = tmpval;
6401 + val14 = set->pmcs[14];
6402 + check_case1 = 1;
6403 + break;
6404 +
6405 + case 14:
6406 + val8 = set->pmcs[8];
6407 + val13 = set->pmcs[13];
6408 + val14 = tmpval;
6409 + check_case1 = 1;
6410 + break;
6411 + }
6412 +
6413 + /*
6414 + * check illegal configuration which can produce inconsistencies
6415 + * in tagging i-side events in L1D and L2 caches
6416 + */
6417 + if (check_case1) {
6418 + ret = (((val13 >> 45) & 0xf) == 0 && ((val8 & 0x1) == 0))
6419 + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
6420 + || (((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
6421 +
6422 + if (ret) {
6423 + PFM_DBG("perfmon: invalid config pmc8=0x%lx "
6424 + "pmc13=0x%lx pmc14=0x%lx",
6425 + val8, val13, val14);
6426 + return -EINVAL;
6427 + }
6428 + }
6429 +
6430 + /*
6431 + * check if configuration implicitely activates the use of
6432 + * the debug registers. If true, then we ensure that this is
6433 + * possible and that we do not pick up stale value in the HW
6434 + * registers.
6435 + *
6436 + * We postpone the checks of pmc13 and pmc14 to avoid side effects
6437 + * in case of errors
6438 + */
6439 +
6440 + /*
6441 + * pmc13 is "active" if:
6442 + * one of the pmc13.cfg_dbrpXX field is different from 0x3
6443 + * AND
6444 + * at the corresponding pmc13.ena_dbrpXX is set.
6445 + */
6446 + if (cnum == 13 && (tmpval & 0x1e00000000000UL)
6447 + && (tmpval & 0x18181818UL) != 0x18181818UL
6448 + && ctx_arch->flags.use_dbr == 0) {
6449 + PFM_DBG("pmc13=0x%lx active", tmpval);
6450 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6451 + if (ret)
6452 + return ret;
6453 + }
6454 +
6455 + /*
6456 + * if any pmc14.ibrpX bit is enabled we must clear the ibrs
6457 + */
6458 + if (cnum == 14 && ((tmpval & 0x2222UL) != 0x2222UL)
6459 + && ctx_arch->flags.use_dbr == 0) {
6460 + PFM_DBG("pmc14=0x%lx active", tmpval);
6461 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6462 + if (ret)
6463 + return ret;
6464 + }
6465 +
6466 + req->reg_value = tmpval;
6467 +
6468 + return 0;
6469 +}
6470 +
6471 +static int pfm_mck_probe_pmu(void)
6472 +{
6473 + return local_cpu_data->family == 0x1f ? 0 : -1;
6474 +}
6475 +
6476 +/*
6477 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
6478 + */
6479 +static struct pfm_pmu_config pfm_mck_pmu_conf = {
6480 + .pmu_name = "Itanium 2",
6481 + .counter_width = 47,
6482 + .pmd_desc = pfm_mck_pmd_desc,
6483 + .pmc_desc = pfm_mck_pmc_desc,
6484 + .pmc_write_check = pfm_mck_pmc_check,
6485 + .num_pmc_entries = PFM_MCK_NUM_PMCS,
6486 + .num_pmd_entries = PFM_MCK_NUM_PMDS,
6487 + .probe_pmu = pfm_mck_probe_pmu,
6488 + .version = "1.0",
6489 + .flags = PFM_PMU_BUILTIN_FLAG,
6490 + .owner = THIS_MODULE,
6491 + .pmu_info = &pfm_mck_pmu_info,
6492 +};
6493 +
6494 +static int __init pfm_mck_pmu_init_module(void)
6495 +{
6496 + return pfm_pmu_register(&pfm_mck_pmu_conf);
6497 +}
6498 +
6499 +static void __exit pfm_mck_pmu_cleanup_module(void)
6500 +{
6501 + pfm_pmu_unregister(&pfm_mck_pmu_conf);
6502 +}
6503 +
6504 +module_init(pfm_mck_pmu_init_module);
6505 +module_exit(pfm_mck_pmu_cleanup_module);
6506 diff --git a/arch/ia64/perfmon/perfmon_montecito.c b/arch/ia64/perfmon/perfmon_montecito.c
6507 new file mode 100644
6508 index 0000000..3f76f73
6509 --- /dev/null
6510 +++ b/arch/ia64/perfmon/perfmon_montecito.c
6511 @@ -0,0 +1,412 @@
6512 +/*
6513 + * This file contains the McKinley PMU register description tables
6514 + * and pmc checker used by perfmon.c.
6515 + *
6516 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
6517 + * Contributed Stephane Eranian <eranian@hpl.hp.com>
6518 + *
6519 + * This program is free software; you can redistribute it and/or
6520 + * modify it under the terms of version 2 of the GNU General Public
6521 + * License as published by the Free Software Foundation.
6522 + *
6523 + * This program is distributed in the hope that it will be useful,
6524 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6525 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
6526 + * General Public License for more details.
6527 + *
6528 + * You should have received a copy of the GNU General Public License
6529 + * along with this program; if not, write to the Free Software
6530 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
6531 + * 02111-1307 USA
6532 + */
6533 +#include <linux/module.h>
6534 +#include <linux/smp.h>
6535 +#include <linux/perfmon_kern.h>
6536 +
6537 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
6538 +MODULE_DESCRIPTION("Dual-Core Itanium 2 (Montecito) PMU description table");
6539 +MODULE_LICENSE("GPL");
6540 +
6541 +#define RDEP(x) (1UL << (x))
6542 +
6543 +#define PFM_MONT_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|\
6544 + RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|\
6545 + RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|\
6546 + RDEP(37)|RDEP(39)|RDEP(40)|RDEP(42))
6547 +
6548 +#define PFM_MONT_NO64 (1UL<<5)
6549 +
6550 +static struct pfm_arch_pmu_info pfm_mont_pmu_info = {
6551 + .mask_pmcs = {PFM_MONT_MASK_PMCS,},
6552 +};
6553 +
6554 +#define PFM_MONT_RSVD 0xffffffff838000a0UL
6555 +/*
6556 + *
6557 + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
6558 + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
6559 + * but this is fine because they are handled separately in the IA-64 specific
6560 + * code.
6561 + *
6562 + * For PMC4-PMC15, PMC40: we force pmc.ism=2 (IA-64 mode only)
6563 + */
6564 +static struct pfm_regmap_desc pfm_mont_pmc_desc[] = {
6565 +/* pmc0 */ PMX_NA,
6566 +/* pmc1 */ PMX_NA,
6567 +/* pmc2 */ PMX_NA,
6568 +/* pmc3 */ PMX_NA,
6569 +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 4),
6570 +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 5),
6571 +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 6),
6572 +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 7),
6573 +/* pmc8 */ PMC_D(PFM_REG_W64, "PMC8" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 8),
6574 +/* pmc9 */ PMC_D(PFM_REG_W64, "PMC9" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 9),
6575 +/* pmc10 */ PMC_D(PFM_REG_W64, "PMC10", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 10),
6576 +/* pmc11 */ PMC_D(PFM_REG_W64, "PMC11", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 11),
6577 +/* pmc12 */ PMC_D(PFM_REG_W64, "PMC12", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 12),
6578 +/* pmc13 */ PMC_D(PFM_REG_W64, "PMC13", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 13),
6579 +/* pmc14 */ PMC_D(PFM_REG_W64, "PMC14", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 14),
6580 +/* pmc15 */ PMC_D(PFM_REG_W64, "PMC15", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 15),
6581 +/* pmc16 */ PMX_NA,
6582 +/* pmc17 */ PMX_NA,
6583 +/* pmc18 */ PMX_NA,
6584 +/* pmc19 */ PMX_NA,
6585 +/* pmc20 */ PMX_NA,
6586 +/* pmc21 */ PMX_NA,
6587 +/* pmc22 */ PMX_NA,
6588 +/* pmc23 */ PMX_NA,
6589 +/* pmc24 */ PMX_NA,
6590 +/* pmc25 */ PMX_NA,
6591 +/* pmc26 */ PMX_NA,
6592 +/* pmc27 */ PMX_NA,
6593 +/* pmc28 */ PMX_NA,
6594 +/* pmc29 */ PMX_NA,
6595 +/* pmc30 */ PMX_NA,
6596 +/* pmc31 */ PMX_NA,
6597 +/* pmc32 */ PMC_D(PFM_REG_W , "PMC32", 0x30f01ffffffffffUL, 0xfcf0fe0000000000UL, 0, 32),
6598 +/* pmc33 */ PMC_D(PFM_REG_W , "PMC33", 0x0, 0xfffffe0000000000UL, 0, 33),
6599 +/* pmc34 */ PMC_D(PFM_REG_W , "PMC34", 0xf01ffffffffffUL, 0xfff0fe0000000000UL, 0, 34),
6600 +/* pmc35 */ PMC_D(PFM_REG_W , "PMC35", 0x0, 0x1ffffffffffUL, 0, 35),
6601 +/* pmc36 */ PMC_D(PFM_REG_W , "PMC36", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 36),
6602 +/* pmc37 */ PMC_D(PFM_REG_W , "PMC37", 0x0, 0xffffffffffffc000UL, 0, 37),
6603 +/* pmc38 */ PMC_D(PFM_REG_W , "PMC38", 0xdb6UL, 0xffffffffffffdb6dUL, 0, 38),
6604 +/* pmc39 */ PMC_D(PFM_REG_W , "PMC39", 0x0, 0xffffffffffff0030UL, 0, 39),
6605 +/* pmc40 */ PMC_D(PFM_REG_W , "PMC40", 0x2000000UL, 0xfffffffffff0fe30UL, 0, 40),
6606 +/* pmc41 */ PMC_D(PFM_REG_W , "PMC41", 0x00002078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 41),
6607 +/* pmc42 */ PMC_D(PFM_REG_W , "PMC42", 0x0, 0xfff800b0UL, 0, 42),
6608 +/* pmc43 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6609 +/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6610 +/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6611 +/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6612 +/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6613 +/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6614 +/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6615 +/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6616 +/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6617 +/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6618 +/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6619 +/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6620 +/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6621 +/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6622 +/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6623 +/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6624 +/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6625 +/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6626 +/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6627 +/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6628 +/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6629 +/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6630 +/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6631 +/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6632 +/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6633 +/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6634 +/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
6635 +/* pmc256 */ PMC_D(PFM_REG_W, "IBR0", 0x0, 0, 0, 0),
6636 +/* pmc257 */ PMC_D(PFM_REG_W, "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
6637 +/* pmc258 */ PMC_D(PFM_REG_W, "IBR2", 0x0, 0, 0, 2),
6638 +/* pmc259 */ PMC_D(PFM_REG_W, "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
6639 +/* pmc260 */ PMC_D(PFM_REG_W, "IBR4", 0x0, 0, 0, 4),
6640 +/* pmc261 */ PMC_D(PFM_REG_W, "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
6641 +/* pmc262 */ PMC_D(PFM_REG_W, "IBR6", 0x0, 0, 0, 6),
6642 +/* pmc263 */ PMC_D(PFM_REG_W, "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
6643 +/* pmc264 */ PMC_D(PFM_REG_W, "DBR0", 0x0, 0, 0, 0),
6644 +/* pmc265 */ PMC_D(PFM_REG_W, "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
6645 +/* pmc266 */ PMC_D(PFM_REG_W, "DBR2", 0x0, 0, 0, 2),
6646 +/* pmc267 */ PMC_D(PFM_REG_W, "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
6647 +/* pmc268 */ PMC_D(PFM_REG_W, "DBR4", 0x0, 0, 0, 4),
6648 +/* pmc269 */ PMC_D(PFM_REG_W, "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
6649 +/* pmc270 */ PMC_D(PFM_REG_W, "DBR6", 0x0, 0, 0, 6),
6650 +/* pmc271 */ PMC_D(PFM_REG_W, "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
6651 +};
6652 +#define PFM_MONT_NUM_PMCS ARRAY_SIZE(pfm_mont_pmc_desc)
6653 +
6654 +static struct pfm_regmap_desc pfm_mont_pmd_desc[] = {
6655 +/* pmd0 */ PMX_NA,
6656 +/* pmd1 */ PMX_NA,
6657 +/* pmd2 */ PMX_NA,
6658 +/* pmd3 */ PMX_NA,
6659 +/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
6660 +/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
6661 +/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
6662 +/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7),
6663 +/* pmd8 */ PMD_DP(PFM_REG_C, "PMD8", 8, 1ull << 8),
6664 +/* pmd9 */ PMD_DP(PFM_REG_C, "PMD9", 9, 1ull << 9),
6665 +/* pmd10 */ PMD_DP(PFM_REG_C, "PMD10", 10, 1ull << 10),
6666 +/* pmd11 */ PMD_DP(PFM_REG_C, "PMD11", 11, 1ull << 11),
6667 +/* pmd12 */ PMD_DP(PFM_REG_C, "PMD12", 12, 1ull << 12),
6668 +/* pmd13 */ PMD_DP(PFM_REG_C, "PMD13", 13, 1ull << 13),
6669 +/* pmd14 */ PMD_DP(PFM_REG_C, "PMD14", 14, 1ull << 14),
6670 +/* pmd15 */ PMD_DP(PFM_REG_C, "PMD15", 15, 1ull << 15),
6671 +/* pmd16 */ PMX_NA,
6672 +/* pmd17 */ PMX_NA,
6673 +/* pmd18 */ PMX_NA,
6674 +/* pmd19 */ PMX_NA,
6675 +/* pmd20 */ PMX_NA,
6676 +/* pmd21 */ PMX_NA,
6677 +/* pmd22 */ PMX_NA,
6678 +/* pmd23 */ PMX_NA,
6679 +/* pmd24 */ PMX_NA,
6680 +/* pmd25 */ PMX_NA,
6681 +/* pmd26 */ PMX_NA,
6682 +/* pmd27 */ PMX_NA,
6683 +/* pmd28 */ PMX_NA,
6684 +/* pmd29 */ PMX_NA,
6685 +/* pmd30 */ PMX_NA,
6686 +/* pmd31 */ PMX_NA,
6687 +/* pmd32 */ PMD_DP(PFM_REG_I, "PMD32", 32, 1ull << 40),
6688 +/* pmd33 */ PMD_DP(PFM_REG_I, "PMD33", 33, 1ull << 40),
6689 +/* pmd34 */ PMD_DP(PFM_REG_I, "PMD34", 34, 1ull << 37),
6690 +/* pmd35 */ PMD_DP(PFM_REG_I, "PMD35", 35, 1ull << 37),
6691 +/* pmd36 */ PMD_DP(PFM_REG_I, "PMD36", 36, 1ull << 40),
6692 +/* pmd37 */ PMX_NA,
6693 +/* pmd38 */ PMD_DP(PFM_REG_I, "PMD38", 38, (1ull<<39)|(1ull<<42)),
6694 +/* pmd39 */ PMD_DP(PFM_REG_I, "PMD39", 39, (1ull<<39)|(1ull<<42)),
6695 +/* pmd40 */ PMX_NA,
6696 +/* pmd41 */ PMX_NA,
6697 +/* pmd42 */ PMX_NA,
6698 +/* pmd43 */ PMX_NA,
6699 +/* pmd44 */ PMX_NA,
6700 +/* pmd45 */ PMX_NA,
6701 +/* pmd46 */ PMX_NA,
6702 +/* pmd47 */ PMX_NA,
6703 +/* pmd48 */ PMD_DP(PFM_REG_I, "PMD48", 48, (1ull<<39)|(1ull<<42)),
6704 +/* pmd49 */ PMD_DP(PFM_REG_I, "PMD49", 49, (1ull<<39)|(1ull<<42)),
6705 +/* pmd50 */ PMD_DP(PFM_REG_I, "PMD50", 50, (1ull<<39)|(1ull<<42)),
6706 +/* pmd51 */ PMD_DP(PFM_REG_I, "PMD51", 51, (1ull<<39)|(1ull<<42)),
6707 +/* pmd52 */ PMD_DP(PFM_REG_I, "PMD52", 52, (1ull<<39)|(1ull<<42)),
6708 +/* pmd53 */ PMD_DP(PFM_REG_I, "PMD53", 53, (1ull<<39)|(1ull<<42)),
6709 +/* pmd54 */ PMD_DP(PFM_REG_I, "PMD54", 54, (1ull<<39)|(1ull<<42)),
6710 +/* pmd55 */ PMD_DP(PFM_REG_I, "PMD55", 55, (1ull<<39)|(1ull<<42)),
6711 +/* pmd56 */ PMD_DP(PFM_REG_I, "PMD56", 56, (1ull<<39)|(1ull<<42)),
6712 +/* pmd57 */ PMD_DP(PFM_REG_I, "PMD57", 57, (1ull<<39)|(1ull<<42)),
6713 +/* pmd58 */ PMD_DP(PFM_REG_I, "PMD58", 58, (1ull<<39)|(1ull<<42)),
6714 +/* pmd59 */ PMD_DP(PFM_REG_I, "PMD59", 59, (1ull<<39)|(1ull<<42)),
6715 +/* pmd60 */ PMD_DP(PFM_REG_I, "PMD60", 60, (1ull<<39)|(1ull<<42)),
6716 +/* pmd61 */ PMD_DP(PFM_REG_I, "PMD61", 61, (1ull<<39)|(1ull<<42)),
6717 +/* pmd62 */ PMD_DP(PFM_REG_I, "PMD62", 62, (1ull<<39)|(1ull<<42)),
6718 +/* pmd63 */ PMD_DP(PFM_REG_I, "PMD63", 63, (1ull<<39)|(1ull<<42))
6719 +};
6720 +#define PFM_MONT_NUM_PMDS ARRAY_SIZE(pfm_mont_pmd_desc)
6721 +
6722 +static int pfm_mont_has_ht;
6723 +
6724 +static int pfm_mont_pmc_check(struct pfm_context *ctx,
6725 + struct pfm_event_set *set,
6726 + struct pfarg_pmc *req)
6727 +{
6728 + struct pfm_arch_context *ctx_arch;
6729 + u64 val32 = 0, val38 = 0, val41 = 0;
6730 + u64 tmpval;
6731 + u16 cnum;
6732 + int ret = 0, check_case1 = 0;
6733 + int is_system;
6734 +
6735 + tmpval = req->reg_value;
6736 + cnum = req->reg_num;
6737 + ctx_arch = pfm_ctx_arch(ctx);
6738 + is_system = ctx->flags.system;
6739 +
6740 +#define PFM_MONT_PMC_PM_POS6 (1UL<<6)
6741 +#define PFM_MONT_PMC_PM_POS4 (1UL<<4)
6742 +
6743 + switch (cnum) {
6744 + case 4:
6745 + case 5:
6746 + case 6:
6747 + case 7:
6748 + case 8:
6749 + case 9:
6750 + if (is_system)
6751 + tmpval |= PFM_MONT_PMC_PM_POS6;
6752 + else
6753 + tmpval &= ~PFM_MONT_PMC_PM_POS6;
6754 + break;
6755 + case 10:
6756 + case 11:
6757 + case 12:
6758 + case 13:
6759 + case 14:
6760 + case 15:
6761 + if ((req->reg_flags & PFM_REGFL_NO_EMUL64) == 0) {
6762 + if (pfm_mont_has_ht) {
6763 + PFM_INFO("perfmon: Errata 121 PMD10/PMD15 cannot be used to overflow"
6764 + "when threads on on");
6765 + return -EINVAL;
6766 + }
6767 + }
6768 + if (is_system)
6769 + tmpval |= PFM_MONT_PMC_PM_POS6;
6770 + else
6771 + tmpval &= ~PFM_MONT_PMC_PM_POS6;
6772 + break;
6773 + case 39:
6774 + case 40:
6775 + case 42:
6776 + if (pfm_mont_has_ht && ((req->reg_value >> 8) & 0x7) == 4) {
6777 + PFM_INFO("perfmon: Errata 120: IP-EAR not available when threads are on");
6778 + return -EINVAL;
6779 + }
6780 + if (is_system)
6781 + tmpval |= PFM_MONT_PMC_PM_POS6;
6782 + else
6783 + tmpval &= ~PFM_MONT_PMC_PM_POS6;
6784 + break;
6785 +
6786 + case 32:
6787 + val32 = tmpval;
6788 + val38 = set->pmcs[38];
6789 + val41 = set->pmcs[41];
6790 + check_case1 = 1;
6791 + break;
6792 +
6793 + case 37:
6794 + if (is_system)
6795 + tmpval |= PFM_MONT_PMC_PM_POS4;
6796 + else
6797 + tmpval &= ~PFM_MONT_PMC_PM_POS4;
6798 + break;
6799 +
6800 + case 38:
6801 + val38 = tmpval;
6802 + val32 = set->pmcs[32];
6803 + val41 = set->pmcs[41];
6804 + check_case1 = 1;
6805 + break;
6806 + case 41:
6807 + val41 = tmpval;
6808 + val32 = set->pmcs[32];
6809 + val38 = set->pmcs[38];
6810 + check_case1 = 1;
6811 + break;
6812 + }
6813 +
6814 + if (check_case1) {
6815 + ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
6816 + && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
6817 + || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
6818 + if (ret) {
6819 + PFM_DBG("perfmon: invalid config pmc38=0x%lx "
6820 + "pmc41=0x%lx pmc32=0x%lx",
6821 + val38, val41, val32);
6822 + return -EINVAL;
6823 + }
6824 + }
6825 +
6826 + /*
6827 + * check if configuration implicitely activates the use of the
6828 + * debug registers. If true, then we ensure that this is possible
6829 + * and that we do not pick up stale value in the HW registers.
6830 + */
6831 +
6832 + /*
6833 + *
6834 + * pmc41 is "active" if:
6835 + * one of the pmc41.cfgdtagXX field is different from 0x3
6836 + * AND
6837 + * the corsesponding pmc41.en_dbrpXX is set.
6838 + * AND
6839 + * ctx_fl_use_dbr (dbr not yet used)
6840 + */
6841 + if (cnum == 41
6842 + && (tmpval & 0x1e00000000000)
6843 + && (tmpval & 0x18181818) != 0x18181818
6844 + && ctx_arch->flags.use_dbr == 0) {
6845 + PFM_DBG("pmc41=0x%lx active, clearing dbr", tmpval);
6846 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6847 + if (ret)
6848 + return ret;
6849 + }
6850 + /*
6851 + * we must clear the (instruction) debug registers if:
6852 + * pmc38.ig_ibrpX is 0 (enabled)
6853 + * and
6854 + * fl_use_dbr == 0 (dbr not yet used)
6855 + */
6856 + if (cnum == 38 && ((tmpval & 0x492) != 0x492)
6857 + && ctx_arch->flags.use_dbr == 0) {
6858 + PFM_DBG("pmc38=0x%lx active pmc38, clearing ibr", tmpval);
6859 + ret = pfm_ia64_mark_dbregs_used(ctx, set);
6860 + if (ret)
6861 + return ret;
6862 +
6863 + }
6864 + req->reg_value = tmpval;
6865 + return 0;
6866 +}
6867 +
6868 +static void pfm_handle_errata(void)
6869 +{
6870 + pfm_mont_has_ht = 1;
6871 +
6872 + PFM_INFO("activating workaround for errata 120 "
6873 + "(Disable IP-EAR when threads are on)");
6874 +
6875 + PFM_INFO("activating workaround for Errata 121 "
6876 + "(PMC10-PMC15 cannot be used to overflow"
6877 + " when threads are on");
6878 +}
6879 +static int pfm_mont_probe_pmu(void)
6880 +{
6881 + if (local_cpu_data->family != 0x20)
6882 + return -1;
6883 +
6884 + /*
6885 + * the 2 errata must be activated when
6886 + * threads are/can be enabled
6887 + */
6888 + if (is_multithreading_enabled())
6889 + pfm_handle_errata();
6890 +
6891 + return 0;
6892 +}
6893 +
6894 +/*
6895 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
6896 + */
6897 +static struct pfm_pmu_config pfm_mont_pmu_conf = {
6898 + .pmu_name = "Montecito",
6899 + .counter_width = 47,
6900 + .pmd_desc = pfm_mont_pmd_desc,
6901 + .pmc_desc = pfm_mont_pmc_desc,
6902 + .num_pmc_entries = PFM_MONT_NUM_PMCS,
6903 + .num_pmd_entries = PFM_MONT_NUM_PMDS,
6904 + .pmc_write_check = pfm_mont_pmc_check,
6905 + .probe_pmu = pfm_mont_probe_pmu,
6906 + .version = "1.0",
6907 + .pmu_info = &pfm_mont_pmu_info,
6908 + .flags = PFM_PMU_BUILTIN_FLAG,
6909 + .owner = THIS_MODULE
6910 +};
6911 +
6912 +static int __init pfm_mont_pmu_init_module(void)
6913 +{
6914 + return pfm_pmu_register(&pfm_mont_pmu_conf);
6915 +}
6916 +
6917 +static void __exit pfm_mont_pmu_cleanup_module(void)
6918 +{
6919 + pfm_pmu_unregister(&pfm_mont_pmu_conf);
6920 +}
6921 +
6922 +module_init(pfm_mont_pmu_init_module);
6923 +module_exit(pfm_mont_pmu_cleanup_module);
6924 diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
6925 index 1e06d23..b87f445 100644
6926 --- a/arch/mips/Kconfig
6927 +++ b/arch/mips/Kconfig
6928 @@ -1857,6 +1857,8 @@ config SECCOMP
6929
6930 If unsure, say Y. Only embedded should say N here.
6931
6932 +source "arch/mips/perfmon/Kconfig"
6933 +
6934 endmenu
6935
6936 config RWSEM_GENERIC_SPINLOCK
6937 diff --git a/arch/mips/Makefile b/arch/mips/Makefile
6938 index 9aab51c..712acf7 100644
6939 --- a/arch/mips/Makefile
6940 +++ b/arch/mips/Makefile
6941 @@ -154,6 +154,12 @@ endif
6942 endif
6943
6944 #
6945 +# Perfmon support
6946 +#
6947 +
6948 +core-$(CONFIG_PERFMON) += arch/mips/perfmon/
6949 +
6950 +#
6951 # Firmware support
6952 #
6953 libs-$(CONFIG_ARC) += arch/mips/fw/arc/
6954 diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
6955 index 22fc19b..4467361 100644
6956 --- a/arch/mips/kernel/process.c
6957 +++ b/arch/mips/kernel/process.c
6958 @@ -27,6 +27,7 @@
6959 #include <linux/completion.h>
6960 #include <linux/kallsyms.h>
6961 #include <linux/random.h>
6962 +#include <linux/perfmon_kern.h>
6963
6964 #include <asm/asm.h>
6965 #include <asm/bootinfo.h>
6966 @@ -94,6 +95,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
6967
6968 void exit_thread(void)
6969 {
6970 + pfm_exit_thread();
6971 }
6972
6973 void flush_thread(void)
6974 @@ -162,6 +164,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
6975 if (clone_flags & CLONE_SETTLS)
6976 ti->tp_value = regs->regs[7];
6977
6978 + pfm_copy_thread(p);
6979 +
6980 return 0;
6981 }
6982
6983 diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
6984 index 5e75a31..e96ddd6 100644
6985 --- a/arch/mips/kernel/scall32-o32.S
6986 +++ b/arch/mips/kernel/scall32-o32.S
6987 @@ -653,6 +653,18 @@ einval: li v0, -EINVAL
6988 sys sys_dup3 3
6989 sys sys_pipe2 2
6990 sys sys_inotify_init1 1
6991 + sys sys_pfm_create_context 4 /* 4330 */
6992 + sys sys_pfm_write_pmcs 3
6993 + sys sys_pfm_write_pmds 4
6994 + sys sys_pfm_read_pmds 3
6995 + sys sys_pfm_load_context 2
6996 + sys sys_pfm_start 2 /* 4335 */
6997 + sys sys_pfm_stop 1
6998 + sys sys_pfm_restart 1
6999 + sys sys_pfm_create_evtsets 3
7000 + sys sys_pfm_getinfo_evtsets 3
7001 + sys sys_pfm_delete_evtsets 3 /* 4340 */
7002 + sys sys_pfm_unload_context 1
7003 .endm
7004
7005 /* We pre-compute the number of _instruction_ bytes needed to
7006 diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
7007 index 3d58204..adb2ba9 100644
7008 --- a/arch/mips/kernel/scall64-64.S
7009 +++ b/arch/mips/kernel/scall64-64.S
7010 @@ -487,4 +487,16 @@ sys_call_table:
7011 PTR sys_dup3
7012 PTR sys_pipe2
7013 PTR sys_inotify_init1
7014 + PTR sys_pfm_create_context
7015 + PTR sys_pfm_write_pmcs /* 5290 */
7016 + PTR sys_pfm_write_pmds
7017 + PTR sys_pfm_read_pmds
7018 + PTR sys_pfm_load_context
7019 + PTR sys_pfm_start
7020 + PTR sys_pfm_stop /* 5295 */
7021 + PTR sys_pfm_restart
7022 + PTR sys_pfm_create_evtsets
7023 + PTR sys_pfm_getinfo_evtsets
7024 + PTR sys_pfm_delete_evtsets
7025 + PTR sys_pfm_unload_context /* 5300 */
7026 .size sys_call_table,.-sys_call_table
7027 diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
7028 index da7f1b6..6d12095 100644
7029 --- a/arch/mips/kernel/scall64-n32.S
7030 +++ b/arch/mips/kernel/scall64-n32.S
7031 @@ -400,12 +400,12 @@ EXPORT(sysn32_call_table)
7032 PTR sys_ioprio_set
7033 PTR sys_ioprio_get
7034 PTR compat_sys_utimensat
7035 - PTR compat_sys_signalfd /* 5280 */
7036 + PTR compat_sys_signalfd /* 6280 */
7037 PTR sys_ni_syscall
7038 PTR sys_eventfd
7039 PTR sys_fallocate
7040 PTR sys_timerfd_create
7041 - PTR sys_timerfd_gettime /* 5285 */
7042 + PTR sys_timerfd_gettime /* 6285 */
7043 PTR sys_timerfd_settime
7044 PTR sys_signalfd4
7045 PTR sys_eventfd2
7046 @@ -413,4 +413,16 @@ EXPORT(sysn32_call_table)
7047 PTR sys_dup3 /* 5290 */
7048 PTR sys_pipe2
7049 PTR sys_inotify_init1
7050 + PTR sys_pfm_create_context
7051 + PTR sys_pfm_write_pmcs
7052 + PTR sys_pfm_write_pmds /* 6295 */
7053 + PTR sys_pfm_read_pmds
7054 + PTR sys_pfm_load_context
7055 + PTR sys_pfm_start
7056 + PTR sys_pfm_stop
7057 + PTR sys_pfm_restart /* 6300 */
7058 + PTR sys_pfm_create_evtsets
7059 + PTR sys_pfm_getinfo_evtsets
7060 + PTR sys_pfm_delete_evtsets
7061 + PTR sys_pfm_unload_context
7062 .size sysn32_call_table,.-sysn32_call_table
7063 diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
7064 index d7cd1aa..e77f55a 100644
7065 --- a/arch/mips/kernel/scall64-o32.S
7066 +++ b/arch/mips/kernel/scall64-o32.S
7067 @@ -535,4 +535,16 @@ sys_call_table:
7068 PTR sys_dup3
7069 PTR sys_pipe2
7070 PTR sys_inotify_init1
7071 + PTR sys_pfm_create_context /* 4330 */
7072 + PTR sys_pfm_write_pmcs
7073 + PTR sys_pfm_write_pmds
7074 + PTR sys_pfm_read_pmds
7075 + PTR sys_pfm_load_context
7076 + PTR sys_pfm_start /* 4335 */
7077 + PTR sys_pfm_stop
7078 + PTR sys_pfm_restart
7079 + PTR sys_pfm_create_evtsets
7080 + PTR sys_pfm_getinfo_evtsets
7081 + PTR sys_pfm_delete_evtsets /* 4340 */
7082 + PTR sys_pfm_unload_context
7083 .size sys_call_table,.-sys_call_table
7084 diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
7085 index a4e106c..6a7e60c 100644
7086 --- a/arch/mips/kernel/signal.c
7087 +++ b/arch/mips/kernel/signal.c
7088 @@ -20,6 +20,7 @@
7089 #include <linux/unistd.h>
7090 #include <linux/compiler.h>
7091 #include <linux/uaccess.h>
7092 +#include <linux/perfmon_kern.h>
7093
7094 #include <asm/abi.h>
7095 #include <asm/asm.h>
7096 @@ -694,8 +695,11 @@ static void do_signal(struct pt_regs *regs)
7097 * - triggered by the TIF_WORK_MASK flags
7098 */
7099 asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
7100 - __u32 thread_info_flags)
7101 + __u32 thread_info_flags)
7102 {
7103 + if (thread_info_flags & _TIF_PERFMON_WORK)
7104 + pfm_handle_work(regs);
7105 +
7106 /* deal with pending signal delivery */
7107 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
7108 do_signal(regs);
7109 diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
7110 index 1f467d5..163dfe4 100644
7111 --- a/arch/mips/kernel/time.c
7112 +++ b/arch/mips/kernel/time.c
7113 @@ -49,10 +49,11 @@ int update_persistent_clock(struct timespec now)
7114 return rtc_mips_set_mmss(now.tv_sec);
7115 }
7116
7117 -static int null_perf_irq(void)
7118 +int null_perf_irq(void)
7119 {
7120 return 0;
7121 }
7122 +EXPORT_SYMBOL(null_perf_irq);
7123
7124 int (*perf_irq)(void) = null_perf_irq;
7125
7126 diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
7127 index b602ac6..9cbd75f 100644
7128 --- a/arch/mips/kernel/traps.c
7129 +++ b/arch/mips/kernel/traps.c
7130 @@ -92,17 +92,15 @@ static void show_raw_backtrace(unsigned long reg29)
7131 #ifdef CONFIG_KALLSYMS
7132 printk("\n");
7133 #endif
7134 - while (!kstack_end(sp)) {
7135 - unsigned long __user *p =
7136 - (unsigned long __user *)(unsigned long)sp++;
7137 - if (__get_user(addr, p)) {
7138 - printk(" (Bad stack address)");
7139 - break;
7140 +#define IS_KVA01(a) ((((unsigned long)a) & 0xc0000000) == 0x80000000)
7141 + if (IS_KVA01(sp)) {
7142 + while (!kstack_end(sp)) {
7143 + addr = *sp++;
7144 + if (__kernel_text_address(addr))
7145 + print_ip_sym(addr);
7146 }
7147 - if (__kernel_text_address(addr))
7148 - print_ip_sym(addr);
7149 + printk("\n");
7150 }
7151 - printk("\n");
7152 }
7153
7154 #ifdef CONFIG_KALLSYMS
7155 diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
7156 index 0b97d47..d8f36b5 100644
7157 --- a/arch/mips/mti-malta/malta-time.c
7158 +++ b/arch/mips/mti-malta/malta-time.c
7159 @@ -27,6 +27,7 @@
7160 #include <linux/time.h>
7161 #include <linux/timex.h>
7162 #include <linux/mc146818rtc.h>
7163 +#include <linux/perfmon_kern.h>
7164
7165 #include <asm/mipsregs.h>
7166 #include <asm/mipsmtregs.h>
7167 diff --git a/arch/mips/perfmon/Kconfig b/arch/mips/perfmon/Kconfig
7168 new file mode 100644
7169 index 0000000..b426eea
7170 --- /dev/null
7171 +++ b/arch/mips/perfmon/Kconfig
7172 @@ -0,0 +1,61 @@
7173 +menu "Hardware Performance Monitoring support"
7174 +config PERFMON
7175 + bool "Perfmon2 performance monitoring interface"
7176 + default n
7177 + help
7178 + Enables the perfmon2 interface to access the hardware
7179 + performance counters. See <http://perfmon2.sf.net/> for
7180 + more details.
7181 +
7182 +config PERFMON_DEBUG
7183 + bool "Perfmon debugging"
7184 + default n
7185 + depends on PERFMON
7186 + help
7187 + Enables perfmon debugging support
7188 +
7189 +config PERFMON_DEBUG_FS
7190 + bool "Enable perfmon statistics reporting via debugfs"
7191 + default y
7192 + depends on PERFMON && DEBUG_FS
7193 + help
7194 + Enable collection and reporting of perfmon timing statistics under
7195 + debugfs. This is used for debugging and performance analysis of the
7196 + subsystem. The debugfs filesystem must be mounted.
7197 +
7198 +config PERFMON_FLUSH
7199 + bool "Flush sampling buffer when modified"
7200 + depends on PERFMON
7201 + default n
7202 + help
7203 + On some MIPS models, cache aliasing may cause invalid
7204 + data to be read from the perfmon sampling buffer. Use this option
7205 + to flush the buffer when it is modified to ensure valid data is
7206 + visible at the user level.
7207 +
7208 +config PERFMON_ALIGN
7209 + bool "Align sampling buffer to avoid cache aliasing"
7210 + depends on PERFMON
7211 + default n
7212 + help
7213 + On some MIPS models, cache aliasing may cause invalid
7214 + data to be read from the perfmon sampling buffer. By forcing a bigger
7215 + page alignment (4-page), one can guarantee the buffer virtual address
7216 + will conflict in the cache with the user level mapping of the buffer
7217 + thereby ensuring a consistent view by user programs.
7218 +
7219 +config PERFMON_DEBUG
7220 + bool "Perfmon debugging"
7221 + depends on PERFMON
7222 + default n
7223 + depends on PERFMON
7224 + help
7225 + Enables perfmon debugging support
7226 +
7227 +config PERFMON_MIPS64
7228 + tristate "Support for MIPS64 hardware performance counters"
7229 + depends on PERFMON
7230 + default n
7231 + help
7232 + Enables support for the MIPS64 hardware performance counters"
7233 +endmenu
7234 diff --git a/arch/mips/perfmon/Makefile b/arch/mips/perfmon/Makefile
7235 new file mode 100644
7236 index 0000000..153b83f
7237 --- /dev/null
7238 +++ b/arch/mips/perfmon/Makefile
7239 @@ -0,0 +1,2 @@
7240 +obj-$(CONFIG_PERFMON) += perfmon.o
7241 +obj-$(CONFIG_PERFMON_MIPS64) += perfmon_mips64.o
7242 diff --git a/arch/mips/perfmon/perfmon.c b/arch/mips/perfmon/perfmon.c
7243 new file mode 100644
7244 index 0000000..6615a77
7245 --- /dev/null
7246 +++ b/arch/mips/perfmon/perfmon.c
7247 @@ -0,0 +1,313 @@
7248 +/*
7249 + * This file implements the MIPS64 specific
7250 + * support for the perfmon2 interface
7251 + *
7252 + * Copyright (c) 2005 Philip J. Mucci
7253 + *
7254 + * based on versions for other architectures:
7255 + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
7256 + * Contributed by Stephane Eranian <eranian@htrpl.hp.com>
7257 + *
7258 + * This program is free software; you can redistribute it and/or
7259 + * modify it under the terms of version 2 of the GNU General Public
7260 + * License as published by the Free Software Foundation.
7261 + *
7262 + * This program is distributed in the hope that it will be useful,
7263 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7264 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7265 + * General Public License for more details.
7266 + *
7267 + * You should have received a copy of the GNU General Public License
7268 + * along with this program; if not, write to the Free Software
7269 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7270 + * 02111-1307 USA
7271 + */
7272 +#include <linux/interrupt.h>
7273 +#include <linux/module.h>
7274 +#include <linux/perfmon_kern.h>
7275 +
7276 +/*
7277 + * collect pending overflowed PMDs. Called from pfm_ctxsw()
7278 + * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
7279 + * and set->npend_ovfls. Interrupts are masked
7280 + */
7281 +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
7282 +{
7283 + u64 new_val, wmask;
7284 + u64 *used_mask, *intr_pmds;
7285 + u64 mask[PFM_PMD_BV];
7286 + unsigned int i, max;
7287 +
7288 + max = ctx->regs.max_intr_pmd;
7289 + intr_pmds = ctx->regs.intr_pmds;
7290 + used_mask = set->used_pmds;
7291 +
7292 + wmask = 1ULL << pfm_pmu_conf->counter_width;
7293 +
7294 + bitmap_and(cast_ulp(mask),
7295 + cast_ulp(intr_pmds),
7296 + cast_ulp(used_mask),
7297 + max);
7298 +
7299 + /*
7300 + * check all PMD that can generate interrupts
7301 + * (that includes counters)
7302 + */
7303 + for (i = 0; i < max; i++) {
7304 + if (test_bit(i, mask)) {
7305 + new_val = pfm_arch_read_pmd(ctx, i);
7306 +
7307 + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
7308 + i, (unsigned long long)new_val,
7309 + (new_val&wmask) ? 1 : 0);
7310 +
7311 + if (new_val & wmask) {
7312 + __set_bit(i, set->povfl_pmds);
7313 + set->npend_ovfls++;
7314 + }
7315 + }
7316 + }
7317 +}
7318 +
7319 +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
7320 + struct pfm_event_set *set)
7321 +{
7322 + unsigned int i, max;
7323 +
7324 + max = ctx->regs.max_pmc;
7325 +
7326 + /*
7327 + * clear enable bits, assume all pmcs are enable pmcs
7328 + */
7329 + for (i = 0; i < max; i++) {
7330 + if (test_bit(i, set->used_pmcs))
7331 + pfm_arch_write_pmc(ctx, i, 0);
7332 + }
7333 +
7334 + if (set->npend_ovfls)
7335 + return;
7336 +
7337 + __pfm_get_ovfl_pmds(ctx, set);
7338 +}
7339 +
7340 +/*
7341 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
7342 + * Context is locked. Interrupts are masked. Monitoring is active.
7343 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
7344 + *
7345 + * for per-thread:
7346 + * must stop monitoring for the task
7347 + *
7348 + * Return:
7349 + * non-zero : did not save PMDs (as part of stopping the PMU)
7350 + * 0 : saved PMDs (no need to save them in caller)
7351 + */
7352 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
7353 +{
7354 + /*
7355 + * disable lazy restore of PMC registers.
7356 + */
7357 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
7358 +
7359 + /*
7360 + * if masked, monitoring is stopped, thus there is no
7361 + * need to stop the PMU again and there is no need to
7362 + * check for pending overflows. This is not just an
7363 + * optimization, this is also for correctness as you
7364 + * may end up detecting overflows twice.
7365 + */
7366 + if (ctx->state == PFM_CTX_MASKED)
7367 + return 1;
7368 +
7369 + pfm_stop_active(task, ctx, ctx->active_set);
7370 +
7371 + return 1;
7372 +}
7373 +
7374 +/*
7375 + * Called from pfm_stop() and pfm_ctxsw()
7376 + * Interrupts are masked. Context is locked. Set is the active set.
7377 + *
7378 + * For per-thread:
7379 + * task is not necessarily current. If not current task, then
7380 + * task is guaranteed stopped and off any cpu. Access to PMU
7381 + * is not guaranteed. Interrupts are masked. Context is locked.
7382 + * Set is the active set.
7383 + *
7384 + * For system-wide:
7385 + * task is current
7386 + *
7387 + * must disable active monitoring. ctx cannot be NULL
7388 + */
7389 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
7390 +{
7391 + /*
7392 + * no need to go through stop_save()
7393 + * if we are already stopped
7394 + */
7395 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
7396 + return;
7397 +
7398 + /*
7399 + * stop live registers and collect pending overflow
7400 + */
7401 + if (task == current)
7402 + pfm_stop_active(task, ctx, ctx->active_set);
7403 +}
7404 +
7405 +/*
7406 + * called from pfm_start() or pfm_ctxsw() when idle task and
7407 + * EXCL_IDLE is on.
7408 + *
7409 + * Interrupts are masked. Context is locked. Set is the active set.
7410 + *
7411 + * For per-trhead:
7412 + * Task is not necessarily current. If not current task, then task
7413 + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
7414 + *
7415 + * For system-wide:
7416 + * task is always current
7417 + *
7418 + * must enable active monitoring.
7419 + */
7420 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
7421 +{
7422 + struct pfm_event_set *set;
7423 + unsigned int i, max_pmc;
7424 +
7425 + if (task != current)
7426 + return;
7427 +
7428 + set = ctx->active_set;
7429 + max_pmc = ctx->regs.max_pmc;
7430 +
7431 + for (i = 0; i < max_pmc; i++) {
7432 + if (test_bit(i, set->used_pmcs))
7433 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
7434 + }
7435 +}
7436 +
7437 +/*
7438 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
7439 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
7440 + * context is locked. Interrupts are masked. set cannot be NULL.
7441 + * Access to the PMU is guaranteed.
7442 + *
7443 + * function must restore all PMD registers from set.
7444 + */
7445 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
7446 +{
7447 + u64 ovfl_mask, val;
7448 + u64 *impl_pmds;
7449 + unsigned int i;
7450 + unsigned int max_pmd;
7451 +
7452 + max_pmd = ctx->regs.max_pmd;
7453 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
7454 + impl_pmds = ctx->regs.pmds;
7455 +
7456 + /*
7457 + * must restore all pmds to avoid leaking
7458 + * information to user.
7459 + */
7460 + for (i = 0; i < max_pmd; i++) {
7461 +
7462 + if (test_bit(i, impl_pmds) == 0)
7463 + continue;
7464 +
7465 + val = set->pmds[i].value;
7466 +
7467 + /*
7468 + * set upper bits for counter to ensure
7469 + * overflow will trigger
7470 + */
7471 + val &= ovfl_mask;
7472 +
7473 + pfm_arch_write_pmd(ctx, i, val);
7474 + }
7475 +}
7476 +
7477 +/*
7478 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
7479 + * pfm_context_load_sys(), pfm_ctxsw().
7480 + * Context is locked. Interrupts are masked. set cannot be NULL.
7481 + * Access to the PMU is guaranteed.
7482 + *
7483 + * function must restore all PMC registers from set, if needed.
7484 + */
7485 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
7486 +{
7487 + u64 *impl_pmcs;
7488 + unsigned int i, max_pmc;
7489 +
7490 + max_pmc = ctx->regs.max_pmc;
7491 + impl_pmcs = ctx->regs.pmcs;
7492 +
7493 + /*
7494 + * - by default no PMCS measures anything
7495 + * - on ctxswout, all used PMCs are disabled (cccr enable bit cleared)
7496 + * hence when masked we do not need to restore anything
7497 + */
7498 + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
7499 + return;
7500 +
7501 + /*
7502 + * restore all pmcs
7503 + */
7504 + for (i = 0; i < max_pmc; i++)
7505 + if (test_bit(i, impl_pmcs))
7506 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
7507 +}
7508 +
7509 +char *pfm_arch_get_pmu_module_name(void)
7510 +{
7511 + switch (cpu_data->cputype) {
7512 +#ifndef CONFIG_SMP
7513 + case CPU_34K:
7514 +#if defined(CPU_74K)
7515 + case CPU_74K:
7516 +#endif
7517 +#endif
7518 + case CPU_SB1:
7519 + case CPU_SB1A:
7520 + case CPU_R12000:
7521 + case CPU_25KF:
7522 + case CPU_24K:
7523 + case CPU_20KC:
7524 + case CPU_5KC:
7525 + return "perfmon_mips64";
7526 + default:
7527 + return NULL;
7528 + }
7529 + return NULL;
7530 +}
7531 +
7532 +int perfmon_perf_irq(void)
7533 +{
7534 + /* BLATANTLY STOLEN FROM OPROFILE, then modified */
7535 + struct pt_regs *regs;
7536 + unsigned int counters = pfm_pmu_conf->regs_all.max_pmc;
7537 + unsigned int control;
7538 + unsigned int counter;
7539 +
7540 + regs = get_irq_regs();
7541 + switch (counters) {
7542 +#define HANDLE_COUNTER(n) \
7543 + case n + 1: \
7544 + control = read_c0_perfctrl ## n(); \
7545 + counter = read_c0_perfcntr ## n(); \
7546 + if ((control & MIPS64_PMC_INT_ENABLE_MASK) && \
7547 + (counter & MIPS64_PMD_INTERRUPT)) { \
7548 + pfm_interrupt_handler(instruction_pointer(regs),\
7549 + regs); \
7550 + return(1); \
7551 + }
7552 + HANDLE_COUNTER(3)
7553 + HANDLE_COUNTER(2)
7554 + HANDLE_COUNTER(1)
7555 + HANDLE_COUNTER(0)
7556 + }
7557 +
7558 + return 0;
7559 +}
7560 +EXPORT_SYMBOL(perfmon_perf_irq);
7561 diff --git a/arch/mips/perfmon/perfmon_mips64.c b/arch/mips/perfmon/perfmon_mips64.c
7562 new file mode 100644
7563 index 0000000..78cb43d
7564 --- /dev/null
7565 +++ b/arch/mips/perfmon/perfmon_mips64.c
7566 @@ -0,0 +1,218 @@
7567 +/*
7568 + * This file contains the MIPS64 and decendent PMU register description tables
7569 + * and pmc checker used by perfmon.c.
7570 + *
7571 + * Copyright (c) 2005 Philip Mucci
7572 + *
7573 + * Based on perfmon_p6.c:
7574 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
7575 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
7576 + *
7577 + * This program is free software; you can redistribute it and/or
7578 + * modify it under the terms of version 2 of the GNU General Public
7579 + * License as published by the Free Software Foundation.
7580 + *
7581 + * This program is distributed in the hope that it will be useful,
7582 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7583 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7584 + * General Public License for more details.
7585 + *
7586 + * You should have received a copy of the GNU General Public License
7587 + * along with this program; if not, write to the Free Software
7588 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7589 + * 02111-1307 USA
7590 + */
7591 +#include <linux/module.h>
7592 +#include <linux/perfmon_kern.h>
7593 +
7594 +MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>");
7595 +MODULE_DESCRIPTION("MIPS64 PMU description tables");
7596 +MODULE_LICENSE("GPL");
7597 +
7598 +/*
7599 + * reserved:
7600 + * - bit 63-9
7601 + * RSVD: reserved bits must be 1
7602 + */
7603 +#define PFM_MIPS64_PMC_RSVD 0xfffffffffffff810ULL
7604 +#define PFM_MIPS64_PMC_VAL (1ULL<<4)
7605 +
7606 +extern int null_perf_irq(struct pt_regs *regs);
7607 +extern int (*perf_irq)(struct pt_regs *regs);
7608 +extern int perfmon_perf_irq(struct pt_regs *regs);
7609 +
7610 +static struct pfm_arch_pmu_info pfm_mips64_pmu_info;
7611 +
7612 +static struct pfm_regmap_desc pfm_mips64_pmc_desc[] = {
7613 +/* pmc0 */ PMC_D(PFM_REG_I64, "CP0_25_0", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 0),
7614 +/* pmc1 */ PMC_D(PFM_REG_I64, "CP0_25_1", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 1),
7615 +/* pmc2 */ PMC_D(PFM_REG_I64, "CP0_25_2", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 2),
7616 +/* pmc3 */ PMC_D(PFM_REG_I64, "CP0_25_3", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 3)
7617 +};
7618 +#define PFM_MIPS64_NUM_PMCS ARRAY_SIZE(pfm_mips64_pmc_desc)
7619 +
7620 +static struct pfm_regmap_desc pfm_mips64_pmd_desc[] = {
7621 +/* pmd0 */ PMD_D(PFM_REG_C, "CP0_25_0", 0),
7622 +/* pmd1 */ PMD_D(PFM_REG_C, "CP0_25_1", 1),
7623 +/* pmd2 */ PMD_D(PFM_REG_C, "CP0_25_2", 2),
7624 +/* pmd3 */ PMD_D(PFM_REG_C, "CP0_25_3", 3)
7625 +};
7626 +#define PFM_MIPS64_NUM_PMDS ARRAY_SIZE(pfm_mips64_pmd_desc)
7627 +
7628 +static int pfm_mips64_probe_pmu(void)
7629 +{
7630 + struct cpuinfo_mips *c = &current_cpu_data;
7631 +
7632 + switch (c->cputype) {
7633 +#ifndef CONFIG_SMP
7634 + case CPU_34K:
7635 +#if defined(CPU_74K)
7636 + case CPU_74K:
7637 +#endif
7638 +#endif
7639 + case CPU_SB1:
7640 + case CPU_SB1A:
7641 + case CPU_R12000:
7642 + case CPU_25KF:
7643 + case CPU_24K:
7644 + case CPU_20KC:
7645 + case CPU_5KC:
7646 + return 0;
7647 + break;
7648 + default:
7649 + PFM_INFO("Unknown cputype 0x%x", c->cputype);
7650 + }
7651 + return -1;
7652 +}
7653 +
7654 +/*
7655 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
7656 + */
7657 +static struct pfm_pmu_config pfm_mips64_pmu_conf = {
7658 + .pmu_name = "MIPS", /* placeholder */
7659 + .counter_width = 31,
7660 + .pmd_desc = pfm_mips64_pmd_desc,
7661 + .pmc_desc = pfm_mips64_pmc_desc,
7662 + .num_pmc_entries = PFM_MIPS64_NUM_PMCS,
7663 + .num_pmd_entries = PFM_MIPS64_NUM_PMDS,
7664 + .probe_pmu = pfm_mips64_probe_pmu,
7665 + .flags = PFM_PMU_BUILTIN_FLAG,
7666 + .owner = THIS_MODULE,
7667 + .pmu_info = &pfm_mips64_pmu_info
7668 +};
7669 +
7670 +static inline int n_counters(void)
7671 +{
7672 + if (!(read_c0_config1() & MIPS64_CONFIG_PMC_MASK))
7673 + return 0;
7674 + if (!(read_c0_perfctrl0() & MIPS64_PMC_CTR_MASK))
7675 + return 1;
7676 + if (!(read_c0_perfctrl1() & MIPS64_PMC_CTR_MASK))
7677 + return 2;
7678 + if (!(read_c0_perfctrl2() & MIPS64_PMC_CTR_MASK))
7679 + return 3;
7680 + return 4;
7681 +}
7682 +
7683 +static int __init pfm_mips64_pmu_init_module(void)
7684 +{
7685 + struct cpuinfo_mips *c = &current_cpu_data;
7686 + int i, ret, num;
7687 + u64 temp_mask;
7688 +
7689 + switch (c->cputype) {
7690 + case CPU_5KC:
7691 + pfm_mips64_pmu_conf.pmu_name = "MIPS5KC";
7692 + break;
7693 + case CPU_R12000:
7694 + pfm_mips64_pmu_conf.pmu_name = "MIPSR12000";
7695 + break;
7696 + case CPU_20KC:
7697 + pfm_mips64_pmu_conf.pmu_name = "MIPS20KC";
7698 + break;
7699 + case CPU_24K:
7700 + pfm_mips64_pmu_conf.pmu_name = "MIPS24K";
7701 + break;
7702 + case CPU_25KF:
7703 + pfm_mips64_pmu_conf.pmu_name = "MIPS25KF";
7704 + break;
7705 + case CPU_SB1:
7706 + pfm_mips64_pmu_conf.pmu_name = "SB1";
7707 + break;
7708 + case CPU_SB1A:
7709 + pfm_mips64_pmu_conf.pmu_name = "SB1A";
7710 + break;
7711 +#ifndef CONFIG_SMP
7712 + case CPU_34K:
7713 + pfm_mips64_pmu_conf.pmu_name = "MIPS34K";
7714 + break;
7715 +#if defined(CPU_74K)
7716 + case CPU_74K:
7717 + pfm_mips64_pmu_conf.pmu_name = "MIPS74K";
7718 + break;
7719 +#endif
7720 +#endif
7721 + default:
7722 + PFM_INFO("Unknown cputype 0x%x", c->cputype);
7723 + return -1;
7724 + }
7725 +
7726 + /* The R14k and older performance counters have to */
7727 + /* be hard-coded, as there is no support for auto-detection */
7728 + if ((c->cputype == CPU_R12000) || (c->cputype == CPU_R14000))
7729 + num = 4;
7730 + else if (c->cputype == CPU_R10000)
7731 + num = 2;
7732 + else
7733 + num = n_counters();
7734 +
7735 + if (num == 0) {
7736 + PFM_INFO("cputype 0x%x has no counters", c->cputype);
7737 + return -1;
7738 + }
7739 + /* mark remaining counters unavailable */
7740 + for (i = num; i < PFM_MIPS64_NUM_PMCS; i++)
7741 + pfm_mips64_pmc_desc[i].type = PFM_REG_NA;
7742 +
7743 + for (i = num; i < PFM_MIPS64_NUM_PMDS; i++)
7744 + pfm_mips64_pmd_desc[i].type = PFM_REG_NA;
7745 +
7746 + /* set the PMC_RSVD mask */
7747 + switch (c->cputype) {
7748 + case CPU_5KC:
7749 + case CPU_R10000:
7750 + case CPU_20KC:
7751 + /* 4-bits for event */
7752 + temp_mask = 0xfffffffffffffe10ULL;
7753 + break;
7754 + case CPU_R12000:
7755 + case CPU_R14000:
7756 + /* 5-bits for event */
7757 + temp_mask = 0xfffffffffffffc10ULL;
7758 + break;
7759 + default:
7760 + /* 6-bits for event */
7761 + temp_mask = 0xfffffffffffff810ULL;
7762 + }
7763 + for (i = 0; i < PFM_MIPS64_NUM_PMCS; i++)
7764 + pfm_mips64_pmc_desc[i].rsvd_msk = temp_mask;
7765 +
7766 + pfm_mips64_pmu_conf.num_pmc_entries = num;
7767 + pfm_mips64_pmu_conf.num_pmd_entries = num;
7768 +
7769 + pfm_mips64_pmu_info.pmu_style = c->cputype;
7770 +
7771 + ret = pfm_pmu_register(&pfm_mips64_pmu_conf);
7772 + if (ret == 0)
7773 + perf_irq = perfmon_perf_irq;
7774 + return ret;
7775 +}
7776 +
7777 +static void __exit pfm_mips64_pmu_cleanup_module(void)
7778 +{
7779 + pfm_pmu_unregister(&pfm_mips64_pmu_conf);
7780 + perf_irq = null_perf_irq;
7781 +}
7782 +
7783 +module_init(pfm_mips64_pmu_init_module);
7784 +module_exit(pfm_mips64_pmu_cleanup_module);
7785 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
7786 index 587da5e..a411389 100644
7787 --- a/arch/powerpc/Kconfig
7788 +++ b/arch/powerpc/Kconfig
7789 @@ -230,6 +230,8 @@ source "init/Kconfig"
7790 source "arch/powerpc/sysdev/Kconfig"
7791 source "arch/powerpc/platforms/Kconfig"
7792
7793 +source "arch/powerpc/perfmon/Kconfig"
7794 +
7795 menu "Kernel options"
7796
7797 config HIGHMEM
7798 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
7799 index c6be19e..7ea20cb 100644
7800 --- a/arch/powerpc/Makefile
7801 +++ b/arch/powerpc/Makefile
7802 @@ -146,6 +146,7 @@ core-y += arch/powerpc/kernel/ \
7803 arch/powerpc/platforms/
7804 core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
7805 core-$(CONFIG_XMON) += arch/powerpc/xmon/
7806 +core-$(CONFIG_PERFMON) += arch/powerpc/perfmon/
7807 core-$(CONFIG_KVM) += arch/powerpc/kvm/
7808
7809 drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
7810 diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
7811 index 5ab7d7f..88cb533 100644
7812 --- a/arch/powerpc/include/asm/Kbuild
7813 +++ b/arch/powerpc/include/asm/Kbuild
7814 @@ -21,6 +21,7 @@ header-y += resource.h
7815 header-y += sigcontext.h
7816 header-y += statfs.h
7817 header-y += ps3fb.h
7818 +header-y += perfmon.h
7819
7820 unifdef-y += bootx.h
7821 unifdef-y += byteorder.h
7822 diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
7823 index 8066eed..981db26 100644
7824 --- a/arch/powerpc/include/asm/cell-pmu.h
7825 +++ b/arch/powerpc/include/asm/cell-pmu.h
7826 @@ -61,6 +61,11 @@
7827
7828 /* Macros for the pm_status register. */
7829 #define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7)))
7830 +#define CBE_PM_OVERFLOW_CTRS(pm_status) (((pm_status) >> 24) & 0xff)
7831 +#define CBE_PM_ALL_OVERFLOW_INTR 0xff000000
7832 +#define CBE_PM_INTERVAL_INTR 0x00800000
7833 +#define CBE_PM_TRACE_BUFFER_FULL_INTR 0x00400000
7834 +#define CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR 0x00200000
7835
7836 enum pm_reg_name {
7837 group_control,
7838 diff --git a/arch/powerpc/include/asm/cell-regs.h b/arch/powerpc/include/asm/cell-regs.h
7839 index fd6fd00..580786d 100644
7840 --- a/arch/powerpc/include/asm/cell-regs.h
7841 +++ b/arch/powerpc/include/asm/cell-regs.h
7842 @@ -117,8 +117,9 @@ struct cbe_pmd_regs {
7843 u8 pad_0x0c1c_0x0c20 [4]; /* 0x0c1c */
7844 #define CBE_PMD_FIR_MODE_M8 0x00800
7845 u64 fir_enable_mask; /* 0x0c20 */
7846 -
7847 - u8 pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28]; /* 0x0c28 */
7848 + u8 pad_0x0c28_0x0c98 [0x0c98 - 0x0c28]; /* 0x0c28 */
7849 + u64 on_ramp_trace; /* 0x0c98 */
7850 + u64 pad_0x0ca0; /* 0x0ca0 */
7851 u64 ras_esc_0; /* 0x0ca8 */
7852 u8 pad_0x0cb0_0x1000 [0x1000 - 0x0cb0]; /* 0x0cb0 */
7853 };
7854 @@ -218,7 +219,11 @@ extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu);
7855
7856
7857 struct cbe_mic_tm_regs {
7858 - u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */
7859 + u8 pad_0x0000_0x0010[0x0010 - 0x0000]; /* 0x0000 */
7860 +
7861 + u64 MBL_debug; /* 0x0010 */
7862 +
7863 + u8 pad_0x0018_0x0040[0x0040 - 0x0018]; /* 0x0018 */
7864
7865 u64 mic_ctl_cnfg2; /* 0x0040 */
7866 #define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL
7867 @@ -303,6 +308,25 @@ struct cbe_mic_tm_regs {
7868 extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np);
7869 extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
7870
7871 +/*
7872 + *
7873 + * PPE Privileged MMIO Registers definition. (offset 0x500000 - 0x500fff)
7874 + *
7875 + */
7876 +struct cbe_ppe_priv_regs {
7877 + u8 pad_0x0000_0x0858[0x0858 - 0x0000]; /* 0x0000 */
7878 +
7879 + u64 L2_debug1; /* 0x0858 */
7880 +
7881 + u8 pad_0x0860_0x0958[0x0958 - 0x0860]; /* 0x0860 */
7882 +
7883 + u64 ciu_dr1; /* 0x0958 */
7884 +
7885 + u8 pad_0x0960_0x1000[0x1000 - 0x0960]; /* 0x0960 */
7886 +};
7887 +
7888 +extern struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu);
7889 +
7890 /* some utility functions to deal with SMT */
7891 extern u32 cbe_get_hw_thread_id(int cpu);
7892 extern u32 cbe_cpu_to_node(int cpu);
7893 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
7894 index 6493a39..ba9ead4 100644
7895 --- a/arch/powerpc/include/asm/paca.h
7896 +++ b/arch/powerpc/include/asm/paca.h
7897 @@ -97,6 +97,10 @@ struct paca_struct {
7898 u8 soft_enabled; /* irq soft-enable flag */
7899 u8 hard_enabled; /* set if irqs are enabled in MSR */
7900 u8 io_sync; /* writel() needs spin_unlock sync */
7901 +#ifdef CONFIG_PERFMON
7902 + u8 pmu_except_pending; /* PMU exception occurred while soft
7903 + * disabled */
7904 +#endif
7905
7906 /* Stuff for accurate time accounting */
7907 u64 user_time; /* accumulated usermode TB ticks */
7908 diff --git a/arch/powerpc/include/asm/perfmon.h b/arch/powerpc/include/asm/perfmon.h
7909 new file mode 100644
7910 index 0000000..da0ae3b
7911 --- /dev/null
7912 +++ b/arch/powerpc/include/asm/perfmon.h
7913 @@ -0,0 +1,33 @@
7914 +/*
7915 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
7916 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
7917 + *
7918 + * This file contains powerpc specific definitions for the perfmon
7919 + * interface.
7920 + *
7921 + * This file MUST never be included directly. Use linux/perfmon.h.
7922 + *
7923 + * This program is free software; you can redistribute it and/or
7924 + * modify it under the terms of version 2 of the GNU General Public
7925 + * License as published by the Free Software Foundation.
7926 + *
7927 + * This program is distributed in the hope that it will be useful,
7928 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7929 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7930 + * General Public License for more details.
7931 + *
7932 + * You should have received a copy of the GNU General Public License
7933 + * along with this program; if not, write to the Free Software
7934 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7935 + * 02111-1307 USA
7936 + */
7937 +#ifndef _ASM_POWERPC_PERFMON_H_
7938 +#define _ASM_POWERPC_PERFMON_H_
7939 +
7940 +/*
7941 + * arch-specific user visible interface definitions
7942 + */
7943 +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
7944 +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
7945 +
7946 +#endif /* _ASM_POWERPC_PERFMON_H_ */
7947 diff --git a/arch/powerpc/include/asm/perfmon_kern.h b/arch/powerpc/include/asm/perfmon_kern.h
7948 new file mode 100644
7949 index 0000000..65ec984
7950 --- /dev/null
7951 +++ b/arch/powerpc/include/asm/perfmon_kern.h
7952 @@ -0,0 +1,390 @@
7953 +/*
7954 + * Copyright (c) 2005 David Gibson, IBM Corporation.
7955 + *
7956 + * Based on other versions:
7957 + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
7958 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
7959 + *
7960 + * This file contains powerpc specific definitions for the perfmon
7961 + * interface.
7962 + *
7963 + * This program is free software; you can redistribute it and/or
7964 + * modify it under the terms of version 2 of the GNU General Public
7965 + * License as published by the Free Software Foundation.
7966 + *
7967 + * This program is distributed in the hope that it will be useful,
7968 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
7969 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7970 + * General Public License for more details.
7971 + *
7972 + * You should have received a copy of the GNU General Public License
7973 + * along with this program; if not, write to the Free Software
7974 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
7975 + * 02111-1307 USA
7976 + */
7977 +#ifndef _ASM_POWERPC_PERFMON_KERN_H_
7978 +#define _ASM_POWERPC_PERFMON_KERN_H_
7979 +
7980 +#ifdef __KERNEL__
7981 +
7982 +#ifdef CONFIG_PERFMON
7983 +
7984 +#include <asm/pmc.h>
7985 +#include <asm/unistd.h>
7986 +
7987 +#define HID0_PMC5_6_GR_MODE (1UL << (63 - 40))
7988 +
7989 +enum powerpc_pmu_type {
7990 + PFM_POWERPC_PMU_NONE,
7991 + PFM_POWERPC_PMU_604,
7992 + PFM_POWERPC_PMU_604e,
7993 + PFM_POWERPC_PMU_750, /* XXX: Minor event set diffs between IBM and Moto. */
7994 + PFM_POWERPC_PMU_7400,
7995 + PFM_POWERPC_PMU_7450,
7996 + PFM_POWERPC_PMU_POWER4,
7997 + PFM_POWERPC_PMU_POWER5,
7998 + PFM_POWERPC_PMU_POWER5p,
7999 + PFM_POWERPC_PMU_POWER6,
8000 + PFM_POWERPC_PMU_CELL,
8001 +};
8002 +
8003 +struct pfm_arch_pmu_info {
8004 + enum powerpc_pmu_type pmu_style;
8005 +
8006 + void (*write_pmc)(unsigned int cnum, u64 value);
8007 + void (*write_pmd)(unsigned int cnum, u64 value);
8008 +
8009 + u64 (*read_pmd)(unsigned int cnum);
8010 +
8011 + void (*enable_counters)(struct pfm_context *ctx,
8012 + struct pfm_event_set *set);
8013 + void (*disable_counters)(struct pfm_context *ctx,
8014 + struct pfm_event_set *set);
8015 +
8016 + void (*irq_handler)(struct pt_regs *regs, struct pfm_context *ctx);
8017 + void (*get_ovfl_pmds)(struct pfm_context *ctx,
8018 + struct pfm_event_set *set);
8019 +
8020 + /* The following routines are optional. */
8021 + void (*restore_pmcs)(struct pfm_context *ctx,
8022 + struct pfm_event_set *set);
8023 + void (*restore_pmds)(struct pfm_context *ctx,
8024 + struct pfm_event_set *set);
8025 +
8026 + int (*ctxswout_thread)(struct task_struct *task,
8027 + struct pfm_context *ctx,
8028 + struct pfm_event_set *set);
8029 + void (*ctxswin_thread)(struct task_struct *task,
8030 + struct pfm_context *ctx,
8031 + struct pfm_event_set *set);
8032 + int (*load_context)(struct pfm_context *ctx);
8033 + void (*unload_context)(struct pfm_context *ctx);
8034 + int (*acquire_pmu)(u64 *unavail_pmcs, u64 *unavail_pmds);
8035 + void (*release_pmu)(void);
8036 + void *platform_info;
8037 + void (*resend_irq)(struct pfm_context *ctx);
8038 +};
8039 +
8040 +#ifdef CONFIG_PPC32
8041 +#define PFM_ARCH_PMD_STK_ARG 6 /* conservative value */
8042 +#define PFM_ARCH_PMC_STK_ARG 6 /* conservative value */
8043 +#else
8044 +#define PFM_ARCH_PMD_STK_ARG 8 /* conservative value */
8045 +#define PFM_ARCH_PMC_STK_ARG 8 /* conservative value */
8046 +#endif
8047 +
8048 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
8049 +{
8050 + struct pfm_arch_pmu_info *arch_info;
8051 +
8052 + arch_info = pfm_pmu_info();
8053 + arch_info->resend_irq(ctx);
8054 +}
8055 +
8056 +static inline void pfm_arch_serialize(void)
8057 +{}
8058 +
8059 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
8060 + unsigned int cnum,
8061 + u64 value)
8062 +{
8063 + struct pfm_arch_pmu_info *arch_info;
8064 +
8065 + arch_info = pfm_pmu_info();
8066 +
8067 + /*
8068 + * we only write to the actual register when monitoring is
8069 + * active (pfm_start was issued)
8070 + */
8071 + if (ctx && ctx->flags.started == 0)
8072 + return;
8073 +
8074 + BUG_ON(!arch_info->write_pmc);
8075 +
8076 + arch_info->write_pmc(cnum, value);
8077 +}
8078 +
8079 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
8080 + unsigned int cnum, u64 value)
8081 +{
8082 + struct pfm_arch_pmu_info *arch_info;
8083 +
8084 + arch_info = pfm_pmu_info();
8085 +
8086 + value &= pfm_pmu_conf->ovfl_mask;
8087 +
8088 + BUG_ON(!arch_info->write_pmd);
8089 +
8090 + arch_info->write_pmd(cnum, value);
8091 +}
8092 +
8093 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
8094 +{
8095 + struct pfm_arch_pmu_info *arch_info;
8096 +
8097 + arch_info = pfm_pmu_info();
8098 +
8099 + BUG_ON(!arch_info->read_pmd);
8100 +
8101 + return arch_info->read_pmd(cnum);
8102 +}
8103 +
8104 +/*
8105 + * For some CPUs, the upper bits of a counter must be set in order for the
8106 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
8107 + * and the upper bits are cleared. This function may be used to set them back.
8108 + */
8109 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
8110 + unsigned int cnum)
8111 +{
8112 + u64 val = pfm_arch_read_pmd(ctx, cnum);
8113 +
8114 + /* This masks out overflow bit 31 */
8115 + pfm_arch_write_pmd(ctx, cnum, val);
8116 +}
8117 +
8118 +/*
8119 + * At certain points, perfmon needs to know if monitoring has been
8120 + * explicitely started/stopped by user via pfm_start/pfm_stop. The
8121 + * information is tracked in flags.started. However on certain
8122 + * architectures, it may be possible to start/stop directly from
8123 + * user level with a single assembly instruction bypassing
8124 + * the kernel. This function must be used to determine by
8125 + * an arch-specific mean if monitoring is actually started/stopped.
8126 + */
8127 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
8128 +{
8129 + return ctx->flags.started;
8130 +}
8131 +
8132 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
8133 + struct pfm_context *ctx)
8134 +{}
8135 +
8136 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
8137 + struct pfm_context *ctx)
8138 +{}
8139 +
8140 +void pfm_arch_init_percpu(void);
8141 +int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
8142 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
8143 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
8144 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
8145 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
8146 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
8147 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
8148 +void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, struct pfm_event_set *set);
8149 +int pfm_arch_get_ovfl_pmds(struct pfm_context *ctx,
8150 + struct pfm_event_set *set);
8151 +char *pfm_arch_get_pmu_module_name(void);
8152 +/*
8153 + * called from __pfm_interrupt_handler(). ctx is not NULL.
8154 + * ctx is locked. PMU interrupt is masked.
8155 + *
8156 + * must stop all monitoring to ensure handler has consistent view.
8157 + * must collect overflowed PMDs bitmask into povfls_pmds and
8158 + * npend_ovfls. If no interrupt detected then npend_ovfls
8159 + * must be set to zero.
8160 + */
8161 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, struct pfm_event_set *set)
8162 +{
8163 + pfm_arch_stop(current, ctx);
8164 +}
8165 +
8166 +void powerpc_irq_handler(struct pt_regs *regs);
8167 +
8168 +/*
8169 + * unfreeze PMU from pfm_do_interrupt_handler()
8170 + * ctx may be NULL for spurious
8171 + */
8172 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
8173 +{
8174 + struct pfm_arch_pmu_info *arch_info;
8175 +
8176 + if (!ctx)
8177 + return;
8178 +
8179 + PFM_DBG_ovfl("state=%d", ctx->state);
8180 +
8181 + ctx->flags.started = 1;
8182 +
8183 + if (ctx->state == PFM_CTX_MASKED)
8184 + return;
8185 +
8186 + arch_info = pfm_pmu_info();
8187 + BUG_ON(!arch_info->enable_counters);
8188 + arch_info->enable_counters(ctx, ctx->active_set);
8189 +}
8190 +
8191 +/*
8192 + * PowerPC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
8193 + * this routine needs to do it when switching sets on overflow
8194 + */
8195 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
8196 + struct pfm_event_set *set)
8197 +{
8198 + pfm_save_pmds(ctx, set);
8199 +}
8200 +
8201 +/*
8202 + * this function is called from the PMU interrupt handler ONLY.
8203 + * On PPC, the PMU is frozen via arch_stop, masking would be implemented
8204 + * via arch-stop as well. Given that the PMU is already stopped when
8205 + * entering the interrupt handler, we do not need to stop it again, so
8206 + * this function is a nop.
8207 + */
8208 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
8209 + struct pfm_event_set *set)
8210 +{}
8211 +
8212 +/*
8213 + * Simply need to start the context in order to unmask.
8214 + */
8215 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
8216 + struct pfm_event_set *set)
8217 +{
8218 + pfm_arch_start(current, ctx);
8219 +}
8220 +
8221 +
8222 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
8223 +{
8224 + return 0;
8225 +}
8226 +
8227 +static inline int pfm_arch_context_create(struct pfm_context *ctx,
8228 + u32 ctx_flags)
8229 +{
8230 + return 0;
8231 +}
8232 +
8233 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
8234 +{}
8235 +
8236 +/* not necessary on PowerPC */
8237 +static inline void pfm_cacheflush(void *addr, unsigned int len)
8238 +{}
8239 +
8240 +/*
8241 + * function called from pfm_setfl_sane(). Context is locked
8242 + * and interrupts are masked.
8243 + * The value of flags is the value of ctx_flags as passed by
8244 + * user.
8245 + *
8246 + * function must check arch-specific set flags.
8247 + * Return:
8248 + * 1 when flags are valid
8249 + * 0 on error
8250 + */
8251 +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
8252 +{
8253 + return 0;
8254 +}
8255 +
8256 +static inline int pfm_arch_init(void)
8257 +{
8258 + return 0;
8259 +}
8260 +
8261 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
8262 +{
8263 + struct pfm_arch_pmu_info *arch_info;
8264 + int rc = 0;
8265 +
8266 + arch_info = pfm_pmu_info();
8267 + if (arch_info->load_context)
8268 + rc = arch_info->load_context(ctx);
8269 +
8270 + return rc;
8271 +}
8272 +
8273 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
8274 +{
8275 + struct pfm_arch_pmu_info *arch_info;
8276 +
8277 + arch_info = pfm_pmu_info();
8278 + if (arch_info->unload_context)
8279 + arch_info->unload_context(ctx);
8280 +}
8281 +
8282 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
8283 +{
8284 + struct pfm_arch_pmu_info *arch_info;
8285 + int rc = 0;
8286 +
8287 + arch_info = pfm_pmu_info();
8288 + if (arch_info->acquire_pmu) {
8289 + rc = arch_info->acquire_pmu(unavail_pmcs, unavail_pmds);
8290 + if (rc)
8291 + return rc;
8292 + }
8293 +
8294 + return reserve_pmc_hardware(powerpc_irq_handler);
8295 +}
8296 +
8297 +static inline void pfm_arch_pmu_release(void)
8298 +{
8299 + struct pfm_arch_pmu_info *arch_info;
8300 +
8301 + arch_info = pfm_pmu_info();
8302 + if (arch_info->release_pmu)
8303 + arch_info->release_pmu();
8304 +
8305 + release_pmc_hardware();
8306 +}
8307 +
8308 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
8309 +{}
8310 +
8311 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
8312 +{}
8313 +
8314 +static inline int pfm_arch_get_base_syscall(void)
8315 +{
8316 + return __NR_pfm_create_context;
8317 +}
8318 +
8319 +struct pfm_arch_context {
8320 + /* Cell: Most recent value of the pm_status
8321 + * register read by the interrupt handler.
8322 + *
8323 + * Interrupt handler sets last_read_updated if it
8324 + * just read and updated last_read_pm_status
8325 + */
8326 + u32 last_read_pm_status;
8327 + u32 last_read_updated;
8328 + u64 powergs_pmc5, powergs_pmc6;
8329 + u64 delta_tb, delta_tb_start;
8330 + u64 delta_purr, delta_purr_start;
8331 +};
8332 +
8333 +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
8334 +/*
8335 + * PowerPC does not need extra alignment requirements for the sampling buffer
8336 + */
8337 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
8338 +
8339 +#endif /* CONFIG_PERFMON */
8340 +
8341 +#endif /* __KERNEL__ */
8342 +#endif /* _ASM_POWERPC_PERFMON_KERN_H_ */
8343 diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
8344 index c6d1ab6..a9f3ad0 100644
8345 --- a/arch/powerpc/include/asm/reg.h
8346 +++ b/arch/powerpc/include/asm/reg.h
8347 @@ -698,6 +698,7 @@
8348 #define PV_POWER5 0x003A
8349 #define PV_POWER5p 0x003B
8350 #define PV_970FX 0x003C
8351 +#define PV_POWER6 0x003E
8352 #define PV_630 0x0040
8353 #define PV_630p 0x0041
8354 #define PV_970MP 0x0044
8355 diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
8356 index f6cc7a4..0164841 100644
8357 --- a/arch/powerpc/include/asm/systbl.h
8358 +++ b/arch/powerpc/include/asm/systbl.h
8359 @@ -322,3 +322,15 @@ SYSCALL_SPU(epoll_create1)
8360 SYSCALL_SPU(dup3)
8361 SYSCALL_SPU(pipe2)
8362 SYSCALL(inotify_init1)
8363 +SYSCALL(pfm_create_context)
8364 +SYSCALL(pfm_write_pmcs)
8365 +SYSCALL(pfm_write_pmds)
8366 +SYSCALL(pfm_read_pmds)
8367 +SYSCALL(pfm_load_context)
8368 +SYSCALL(pfm_start)
8369 +SYSCALL(pfm_stop)
8370 +SYSCALL(pfm_restart)
8371 +SYSCALL(pfm_create_evtsets)
8372 +SYSCALL(pfm_getinfo_evtsets)
8373 +SYSCALL(pfm_delete_evtsets)
8374 +SYSCALL(pfm_unload_context)
8375 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
8376 index 9665a26..6cda9f9 100644
8377 --- a/arch/powerpc/include/asm/thread_info.h
8378 +++ b/arch/powerpc/include/asm/thread_info.h
8379 @@ -130,10 +130,12 @@ static inline struct thread_info *current_thread_info(void)
8380 #define _TIF_FREEZE (1<<TIF_FREEZE)
8381 #define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
8382 #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
8383 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
8384 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
8385 #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
8386
8387 #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
8388 - _TIF_NOTIFY_RESUME)
8389 + _TIF_NOTIFY_RESUME | _TIF_PERFMON_WORK)
8390 #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
8391
8392 /* Bits in local_flags */
8393 diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
8394 index e07d0c7..6226cba 100644
8395 --- a/arch/powerpc/include/asm/unistd.h
8396 +++ b/arch/powerpc/include/asm/unistd.h
8397 @@ -341,10 +341,22 @@
8398 #define __NR_dup3 316
8399 #define __NR_pipe2 317
8400 #define __NR_inotify_init1 318
8401 +#define __NR_pfm_create_context 319
8402 +#define __NR_pfm_write_pmcs 320
8403 +#define __NR_pfm_write_pmds 321
8404 +#define __NR_pfm_read_pmds 322
8405 +#define __NR_pfm_load_context 323
8406 +#define __NR_pfm_start 324
8407 +#define __NR_pfm_stop 325
8408 +#define __NR_pfm_restart 326
8409 +#define __NR_pfm_create_evtsets 327
8410 +#define __NR_pfm_getinfo_evtsets 328
8411 +#define __NR_pfm_delete_evtsets 329
8412 +#define __NR_pfm_unload_context 330
8413
8414 #ifdef __KERNEL__
8415
8416 -#define __NR_syscalls 319
8417 +#define __NR_syscalls 331
8418
8419 #define __NR__exit __NR_exit
8420 #define NR_syscalls __NR_syscalls
8421 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
8422 index 1cbbf70..198645f 100644
8423 --- a/arch/powerpc/kernel/entry_32.S
8424 +++ b/arch/powerpc/kernel/entry_32.S
8425 @@ -39,7 +39,7 @@
8426 * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
8427 */
8428 #if MSR_KERNEL >= 0x10000
8429 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
8430 +#define LOAD_MSR_KERNEL(r, x) lis r,(x)@ha; ori r,r,(x)@l
8431 #else
8432 #define LOAD_MSR_KERNEL(r, x) li r,(x)
8433 #endif
8434 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
8435 index 2d802e9..77a090d 100644
8436 --- a/arch/powerpc/kernel/entry_64.S
8437 +++ b/arch/powerpc/kernel/entry_64.S
8438 @@ -643,6 +643,10 @@ user_work:
8439 b .ret_from_except_lite
8440
8441 1: bl .save_nvgprs
8442 +#ifdef CONFIG_PERFMON
8443 + addi r3,r1,STACK_FRAME_OVERHEAD
8444 + bl .pfm_handle_work
8445 +#endif /* CONFIG_PERFMON */
8446 addi r3,r1,STACK_FRAME_OVERHEAD
8447 bl .do_signal
8448 b .ret_from_except
8449 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
8450 index d972dec..b255fba 100644
8451 --- a/arch/powerpc/kernel/irq.c
8452 +++ b/arch/powerpc/kernel/irq.c
8453 @@ -104,6 +104,24 @@ static inline notrace void set_soft_enabled(unsigned long enable)
8454 : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
8455 }
8456
8457 +#ifdef CONFIG_PERFMON
8458 +static inline unsigned long get_pmu_except_pending(void)
8459 +{
8460 + unsigned long pending;
8461 +
8462 + __asm__ __volatile__("lbz %0,%1(13)"
8463 + : "=r" (pending) : "i" (offsetof(struct paca_struct, pmu_except_pending)));
8464 +
8465 + return pending;
8466 +}
8467 +
8468 +static inline void set_pmu_except_pending(unsigned long pending)
8469 +{
8470 + __asm__ __volatile__("stb %0,%1(13)"
8471 + : : "r" (pending), "i" (offsetof(struct paca_struct, pmu_except_pending)));
8472 +}
8473 +#endif /* CONFIG_PERFMON */
8474 +
8475 notrace void raw_local_irq_restore(unsigned long en)
8476 {
8477 /*
8478 @@ -162,6 +180,19 @@ notrace void raw_local_irq_restore(unsigned long en)
8479 lv1_get_version_info(&tmp);
8480 }
8481
8482 +#ifdef CONFIG_PERFMON
8483 + /*
8484 + * If a PMU exception occurred while interrupts were soft disabled,
8485 + * force a PMU exception.
8486 + */
8487 + if (get_pmu_except_pending()) {
8488 + set_pmu_except_pending(0);
8489 + /* Make sure we trigger the edge detection circuitry */
8490 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
8491 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
8492 + }
8493 +#endif /* CONFIG_PERFMON */
8494 +
8495 __hard_irq_enable();
8496 }
8497 EXPORT_SYMBOL(raw_local_irq_restore);
8498 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
8499 index 957bded..32dbc8e 100644
8500 --- a/arch/powerpc/kernel/process.c
8501 +++ b/arch/powerpc/kernel/process.c
8502 @@ -33,6 +33,7 @@
8503 #include <linux/mqueue.h>
8504 #include <linux/hardirq.h>
8505 #include <linux/utsname.h>
8506 +#include <linux/perfmon_kern.h>
8507
8508 #include <asm/pgtable.h>
8509 #include <asm/uaccess.h>
8510 @@ -393,9 +394,14 @@ struct task_struct *__switch_to(struct task_struct *prev,
8511 new_thread->start_tb = current_tb;
8512 }
8513 #endif
8514 -
8515 local_irq_save(flags);
8516
8517 + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW))
8518 + pfm_ctxsw_out(prev, new);
8519 +
8520 + if (test_tsk_thread_flag(new, TIF_PERFMON_CTXSW))
8521 + pfm_ctxsw_in(prev, new);
8522 +
8523 account_system_vtime(current);
8524 account_process_vtime(current);
8525 calculate_steal_time();
8526 @@ -544,6 +550,7 @@ void show_regs(struct pt_regs * regs)
8527 void exit_thread(void)
8528 {
8529 discard_lazy_cpu_state();
8530 + pfm_exit_thread();
8531 }
8532
8533 void flush_thread(void)
8534 @@ -669,6 +676,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
8535 #else
8536 kregs->nip = (unsigned long)ret_from_fork;
8537 #endif
8538 + pfm_copy_thread(p);
8539
8540 return 0;
8541 }
8542 diff --git a/arch/powerpc/perfmon/Kconfig b/arch/powerpc/perfmon/Kconfig
8543 new file mode 100644
8544 index 0000000..3f4bbf2
8545 --- /dev/null
8546 +++ b/arch/powerpc/perfmon/Kconfig
8547 @@ -0,0 +1,67 @@
8548 +menu "Hardware Performance Monitoring support"
8549 +config PERFMON
8550 + bool "Perfmon2 performance monitoring interface"
8551 + default n
8552 + help
8553 + Enables the perfmon2 interface to access the hardware
8554 + performance counters. See <http://perfmon2.sf.net/> for
8555 + more details.
8556 +
8557 +config PERFMON_DEBUG
8558 + bool "Perfmon debugging"
8559 + default n
8560 + depends on PERFMON
8561 + help
8562 + Enables perfmon debugging support
8563 +
8564 +config PERFMON_DEBUG_FS
8565 + bool "Enable perfmon statistics reporting via debugfs"
8566 + default y
8567 + depends on PERFMON && DEBUG_FS
8568 + help
8569 + Enable collection and reporting of perfmon timing statistics under
8570 + debugfs. This is used for debugging and performance analysis of the
8571 + subsystem. The debugfs filesystem must be mounted.
8572 +
8573 +config PERFMON_POWER4
8574 + tristate "Support for Power4 hardware performance counters"
8575 + depends on PERFMON && PPC64
8576 + default n
8577 + help
8578 + Enables support for the Power 4 hardware performance counters
8579 + If unsure, say M.
8580 +
8581 +config PERFMON_POWER5
8582 + tristate "Support for Power5 hardware performance counters"
8583 + depends on PERFMON && PPC64
8584 + default n
8585 + help
8586 + Enables support for the Power 5 hardware performance counters
8587 + If unsure, say M.
8588 +
8589 +config PERFMON_POWER6
8590 + tristate "Support for Power6 hardware performance counters"
8591 + depends on PERFMON && PPC64
8592 + default n
8593 + help
8594 + Enables support for the Power 6 hardware performance counters
8595 + If unsure, say M.
8596 +
8597 +config PERFMON_PPC32
8598 + tristate "Support for PPC32 hardware performance counters"
8599 + depends on PERFMON && PPC32
8600 + default n
8601 + help
8602 + Enables support for the PPC32 hardware performance counters
8603 + If unsure, say M.
8604 +
8605 +config PERFMON_CELL
8606 + tristate "Support for Cell hardware performance counters"
8607 + depends on PERFMON && PPC_CELL
8608 + select PS3_LPM if PPC_PS3
8609 + default n
8610 + help
8611 + Enables support for the Cell hardware performance counters.
8612 + If unsure, say M.
8613 +
8614 +endmenu
8615 diff --git a/arch/powerpc/perfmon/Makefile b/arch/powerpc/perfmon/Makefile
8616 new file mode 100644
8617 index 0000000..300661f
8618 --- /dev/null
8619 +++ b/arch/powerpc/perfmon/Makefile
8620 @@ -0,0 +1,6 @@
8621 +obj-$(CONFIG_PERFMON) += perfmon.o
8622 +obj-$(CONFIG_PERFMON_POWER4) += perfmon_power4.o
8623 +obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o
8624 +obj-$(CONFIG_PERFMON_POWER6) += perfmon_power6.o
8625 +obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o
8626 +obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o
8627 diff --git a/arch/powerpc/perfmon/perfmon.c b/arch/powerpc/perfmon/perfmon.c
8628 new file mode 100644
8629 index 0000000..51a8b6a
8630 --- /dev/null
8631 +++ b/arch/powerpc/perfmon/perfmon.c
8632 @@ -0,0 +1,334 @@
8633 +/*
8634 + * This file implements the powerpc specific
8635 + * support for the perfmon2 interface
8636 + *
8637 + * Copyright (c) 2005 David Gibson, IBM Corporation.
8638 + *
8639 + * based on versions for other architectures:
8640 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
8641 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
8642 + *
8643 + * This program is free software; you can redistribute it and/or
8644 + * modify it under the terms of version 2 of the GNU General Public
8645 + * License as published by the Free Software Foundation.
8646 + *
8647 + * This program is distributed in the hope that it will be useful,
8648 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8649 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8650 + * General Public License for more details.
8651 + *
8652 + * You should have received a copy of the GNU General Public License
8653 + * along with this program; if not, write to the Free Software
8654 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
8655 + * 02111-1307 USA
8656 + */
8657 +#include <linux/interrupt.h>
8658 +#include <linux/perfmon_kern.h>
8659 +
8660 +static void pfm_stop_active(struct task_struct *task,
8661 + struct pfm_context *ctx, struct pfm_event_set *set)
8662 +{
8663 + struct pfm_arch_pmu_info *arch_info;
8664 +
8665 + arch_info = pfm_pmu_info();
8666 + BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds);
8667 +
8668 + arch_info->disable_counters(ctx, set);
8669 +
8670 + if (set->npend_ovfls)
8671 + return;
8672 +
8673 + arch_info->get_ovfl_pmds(ctx, set);
8674 +}
8675 +
8676 +/*
8677 + * Called from pfm_save_pmds(). Interrupts are masked. Registers are
8678 + * already saved away.
8679 + */
8680 +void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
8681 + struct pfm_event_set *set)
8682 +{
8683 + int i, num;
8684 + u64 *used_pmds, *intr_pmds;
8685 +
8686 + num = set->nused_pmds;
8687 + used_pmds = set->used_pmds;
8688 + intr_pmds = ctx->regs.intr_pmds;
8689 +
8690 + for (i = 0; num; i++)
8691 + if (likely(test_bit(i, used_pmds))) {
8692 + if (likely(test_bit(i, intr_pmds)))
8693 + pfm_write_pmd(ctx, i, 0);
8694 + num--;
8695 + }
8696 +}
8697 +
8698 +/*
8699 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
8700 + * Context is locked. Interrupts are masked. Monitoring is active.
8701 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
8702 + *
8703 + * for per-thread:
8704 + * must stop monitoring for the task
8705 + * Return:
8706 + * non-zero : did not save PMDs (as part of stopping the PMU)
8707 + * 0 : saved PMDs (no need to save them in caller)
8708 + */
8709 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
8710 +{
8711 + struct pfm_arch_pmu_info *arch_info;
8712 +
8713 + arch_info = pfm_pmu_info();
8714 + /*
8715 + * disable lazy restore of the PMC/PMD registers.
8716 + */
8717 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
8718 +
8719 + if (ctx->state == PFM_CTX_MASKED)
8720 + return 1;
8721 +
8722 + pfm_stop_active(task, ctx, ctx->active_set);
8723 +
8724 + if (arch_info->ctxswout_thread)
8725 + arch_info->ctxswout_thread(task, ctx, ctx->active_set);
8726 +
8727 + return pfm_arch_is_active(ctx);
8728 +}
8729 +
8730 +/*
8731 + * Called from pfm_ctxsw
8732 + */
8733 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
8734 +{
8735 + struct pfm_arch_pmu_info *arch_info;
8736 +
8737 + arch_info = pfm_pmu_info();
8738 + if (ctx->state != PFM_CTX_MASKED && ctx->flags.started == 1) {
8739 + BUG_ON(!arch_info->enable_counters);
8740 + arch_info->enable_counters(ctx, ctx->active_set);
8741 + }
8742 +
8743 + if (arch_info->ctxswin_thread)
8744 + arch_info->ctxswin_thread(task, ctx, ctx->active_set);
8745 +}
8746 +
8747 +/*
8748 + * Called from pfm_stop() and idle notifier
8749 + *
8750 + * Interrupts are masked. Context is locked. Set is the active set.
8751 + *
8752 + * For per-thread:
8753 + * task is not necessarily current. If not current task, then
8754 + * task is guaranteed stopped and off any cpu. Access to PMU
8755 + * is not guaranteed. Interrupts are masked. Context is locked.
8756 + * Set is the active set.
8757 + *
8758 + * For system-wide:
8759 + * task is current
8760 + *
8761 + * must disable active monitoring. ctx cannot be NULL
8762 + */
8763 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
8764 +{
8765 + /*
8766 + * no need to go through stop_save()
8767 + * if we are already stopped
8768 + */
8769 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
8770 + return;
8771 +
8772 + /*
8773 + * stop live registers and collect pending overflow
8774 + */
8775 + if (task == current)
8776 + pfm_stop_active(task, ctx, ctx->active_set);
8777 +}
8778 +
8779 +/*
8780 + * Enable active monitoring. Called from pfm_start() and
8781 + * pfm_arch_unmask_monitoring().
8782 + *
8783 + * Interrupts are masked. Context is locked. Set is the active set.
8784 + *
8785 + * For per-thread:
8786 + * Task is not necessarily current. If not current task, then task
8787 + * is guaranteed stopped and off any cpu. No access to PMU if task
8788 + * is not current.
8789 + *
8790 + * For system-wide:
8791 + * Task is always current
8792 + */
8793 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
8794 +{
8795 + struct pfm_arch_pmu_info *arch_info;
8796 +
8797 + arch_info = pfm_pmu_info();
8798 + if (task != current)
8799 + return;
8800 +
8801 + BUG_ON(!arch_info->enable_counters);
8802 +
8803 + arch_info->enable_counters(ctx, ctx->active_set);
8804 +}
8805 +
8806 +/*
8807 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
8808 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
8809 + * context is locked. Interrupts are masked. set cannot be NULL.
8810 + * Access to the PMU is guaranteed.
8811 + *
8812 + * function must restore all PMD registers from set.
8813 + */
8814 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
8815 +{
8816 + struct pfm_arch_pmu_info *arch_info;
8817 + u64 *used_pmds;
8818 + u16 i, num;
8819 +
8820 + arch_info = pfm_pmu_info();
8821 +
8822 + /* The model-specific module can override the default
8823 + * restore-PMD method.
8824 + */
8825 + if (arch_info->restore_pmds)
8826 + return arch_info->restore_pmds(ctx, set);
8827 +
8828 + num = set->nused_pmds;
8829 + used_pmds = set->used_pmds;
8830 +
8831 + for (i = 0; num; i++) {
8832 + if (likely(test_bit(i, used_pmds))) {
8833 + pfm_write_pmd(ctx, i, set->pmds[i].value);
8834 + num--;
8835 + }
8836 + }
8837 +}
8838 +
8839 +/*
8840 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
8841 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
8842 + * context is locked. Interrupts are masked. set cannot be NULL.
8843 + * Access to the PMU is guaranteed.
8844 + *
8845 + * function must restore all PMC registers from set, if needed.
8846 + */
8847 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
8848 +{
8849 + struct pfm_arch_pmu_info *arch_info;
8850 + u64 *impl_pmcs;
8851 + unsigned int i, max_pmc, reg;
8852 +
8853 + arch_info = pfm_pmu_info();
8854 + /* The model-specific module can override the default
8855 + * restore-PMC method.
8856 + */
8857 + if (arch_info->restore_pmcs)
8858 + return arch_info->restore_pmcs(ctx, set);
8859 +
8860 + /* The "common" powerpc model's enable the counters simply by writing
8861 + * all the control registers. Therefore, if we're masked or stopped we
8862 + * don't need to bother restoring the PMCs now.
8863 + */
8864 + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
8865 + return;
8866 +
8867 + max_pmc = ctx->regs.max_pmc;
8868 + impl_pmcs = ctx->regs.pmcs;
8869 +
8870 + /*
8871 + * Restore all pmcs in reverse order to ensure the counters aren't
8872 + * enabled before their event selectors are set correctly.
8873 + */
8874 + reg = max_pmc - 1;
8875 + for (i = 0; i < max_pmc; i++) {
8876 + if (test_bit(reg, impl_pmcs))
8877 + pfm_arch_write_pmc(ctx, reg, set->pmcs[reg]);
8878 + reg--;
8879 + }
8880 +}
8881 +
8882 +char *pfm_arch_get_pmu_module_name(void)
8883 +{
8884 + unsigned int pvr = mfspr(SPRN_PVR);
8885 +
8886 + switch (PVR_VER(pvr)) {
8887 + case 0x0004: /* 604 */
8888 + case 0x0009: /* 604e; */
8889 + case 0x000A: /* 604ev */
8890 + case 0x0008: /* 750/740 */
8891 + case 0x7000: /* 750FX */
8892 + case 0x7001:
8893 + case 0x7002: /* 750GX */
8894 + case 0x000C: /* 7400 */
8895 + case 0x800C: /* 7410 */
8896 + case 0x8000: /* 7451/7441 */
8897 + case 0x8001: /* 7455/7445 */
8898 + case 0x8002: /* 7457/7447 */
8899 + case 0x8003: /* 7447A */
8900 + case 0x8004: /* 7448 */
8901 + return("perfmon_ppc32");
8902 + case PV_POWER4:
8903 + case PV_POWER4p:
8904 + return "perfmon_power4";
8905 + case PV_POWER5:
8906 + return "perfmon_power5";
8907 + case PV_POWER5p:
8908 + if (PVR_REV(pvr) < 0x300)
8909 + /* PMU behaves like POWER5 */
8910 + return "perfmon_power5";
8911 + else
8912 + /* PMU behaves like POWER6 */
8913 + return "perfmon_power6";
8914 + case PV_POWER6:
8915 + return "perfmon_power6";
8916 + case PV_970:
8917 + case PV_970FX:
8918 + case PV_970MP:
8919 + return "perfmon_ppc970";
8920 + case PV_BE:
8921 + return "perfmon_cell";
8922 + }
8923 + return NULL;
8924 +}
8925 +
8926 +void pfm_arch_init_percpu(void)
8927 +{
8928 +#ifdef CONFIG_PPC64
8929 + extern void ppc64_enable_pmcs(void);
8930 + ppc64_enable_pmcs();
8931 +#endif
8932 +}
8933 +
8934 +/**
8935 + * powerpc_irq_handler
8936 + *
8937 + * Get the perfmon context that belongs to the current CPU, and call the
8938 + * model-specific interrupt handler.
8939 + **/
8940 +void powerpc_irq_handler(struct pt_regs *regs)
8941 +{
8942 + struct pfm_arch_pmu_info *arch_info;
8943 + struct pfm_context *ctx;
8944 +
8945 + if (! regs->softe) {
8946 + /*
8947 + * We got a PMU interrupt while interrupts were soft
8948 + * disabled. Disable hardware interrupts by clearing
8949 + * MSR_EE and also clear PMAO because we will need to set
8950 + * that again later when interrupts are re-enabled and
8951 + * raw_local_irq_restore() sees that the pmu_except_pending
8952 + * flag is set.
8953 + */
8954 + regs->msr &= ~MSR_EE;
8955 + get_paca()->pmu_except_pending = 1;
8956 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
8957 + return;
8958 + }
8959 +
8960 + arch_info = pfm_pmu_info();
8961 + if (arch_info->irq_handler) {
8962 + ctx = __get_cpu_var(pmu_ctx);
8963 + if (likely(ctx))
8964 + arch_info->irq_handler(regs, ctx);
8965 + }
8966 +}
8967 diff --git a/arch/powerpc/perfmon/perfmon_cell.c b/arch/powerpc/perfmon/perfmon_cell.c
8968 new file mode 100644
8969 index 0000000..e1ae12c
8970 --- /dev/null
8971 +++ b/arch/powerpc/perfmon/perfmon_cell.c
8972 @@ -0,0 +1,1449 @@
8973 +/*
8974 + * This file contains the Cell PMU register description tables
8975 + * and pmc checker used by perfmon.c.
8976 + *
8977 + * Copyright IBM Corporation 2007
8978 + * (C) Copyright 2007 TOSHIBA CORPORATION
8979 + *
8980 + * Based on other Perfmon2 PMU modules.
8981 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
8982 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
8983 + *
8984 + * This program is free software; you can redistribute it and/or
8985 + * modify it under the terms of version 2 of the GNU General Public
8986 + * License as published by the Free Software Foundation.
8987 + *
8988 + * This program is distributed in the hope that it will be useful,
8989 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8990 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
8991 + * General Public License for more details.
8992 + *
8993 + * You should have received a copy of the GNU General Public License
8994 + * along with this program; if not, write to the Free Software
8995 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
8996 + * 02111-1307 USA
8997 + */
8998 +
8999 +#include <linux/module.h>
9000 +#include <linux/perfmon_kern.h>
9001 +#include <linux/io.h>
9002 +#include <asm/cell-pmu.h>
9003 +#include <asm/cell-regs.h>
9004 +#include <asm/machdep.h>
9005 +#include <asm/rtas.h>
9006 +#include <asm/ps3.h>
9007 +#include <asm/spu.h>
9008 +
9009 +MODULE_AUTHOR("Kevin Corry <kevcorry@us.ibm.com>, "
9010 + "Carl Love <carll@us.ibm.com>");
9011 +MODULE_DESCRIPTION("Cell PMU description table");
9012 +MODULE_LICENSE("GPL");
9013 +
9014 +struct pfm_cell_platform_pmu_info {
9015 + u32 (*read_ctr)(u32 cpu, u32 ctr);
9016 + void (*write_ctr)(u32 cpu, u32 ctr, u32 val);
9017 + void (*write_pm07_control)(u32 cpu, u32 ctr, u32 val);
9018 + void (*write_pm)(u32 cpu, enum pm_reg_name reg, u32 val);
9019 + void (*enable_pm)(u32 cpu);
9020 + void (*disable_pm)(u32 cpu);
9021 + void (*enable_pm_interrupts)(u32 cpu, u32 thread, u32 mask);
9022 + u32 (*get_and_clear_pm_interrupts)(u32 cpu);
9023 + u32 (*get_hw_thread_id)(int cpu);
9024 + struct cbe_ppe_priv_regs __iomem *(*get_cpu_ppe_priv_regs)(int cpu);
9025 + struct cbe_pmd_regs __iomem *(*get_cpu_pmd_regs)(int cpu);
9026 + struct cbe_mic_tm_regs __iomem *(*get_cpu_mic_tm_regs)(int cpu);
9027 + int (*rtas_token)(const char *service);
9028 + int (*rtas_call)(int token, int param1, int param2, int *param3, ...);
9029 +};
9030 +
9031 +/*
9032 + * Mapping from Perfmon logical control registers to Cell hardware registers.
9033 + */
9034 +static struct pfm_regmap_desc pfm_cell_pmc_desc[] = {
9035 + /* Per-counter control registers. */
9036 + PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0),
9037 + PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0),
9038 + PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0),
9039 + PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0),
9040 + PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0),
9041 + PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0),
9042 + PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0),
9043 + PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0),
9044 +
9045 + /* Per-counter RTAS arguments. Each of these registers has three fields.
9046 + * bits 63-48: debug-bus word
9047 + * bits 47-32: sub-unit
9048 + * bits 31-0 : full signal number
9049 + * (MSB = 63, LSB = 0)
9050 + */
9051 + PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0),
9052 + PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0),
9053 + PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0),
9054 + PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0),
9055 + PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0),
9056 + PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0),
9057 + PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0),
9058 + PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0),
9059 +
9060 + /* Global control registers. Same order as enum pm_reg_name. */
9061 + PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0),
9062 + PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0),
9063 + PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0),
9064 + PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0),
9065 + PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0),
9066 + /* set the interrupt overflow bit for the four 32 bit counters
9067 + * that is currently supported. Will need to fix when 32 and 16
9068 + * bit counters are supported.
9069 + */
9070 + PMC_D(PFM_REG_I, "pm_control", 0xF0000000, 0xF0000000, 0, 0),
9071 + PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* FIX: Does user-space also need read access to this one? */
9072 + PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0),
9073 +};
9074 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc)
9075 +
9076 +#define CELL_PMC_GROUP_CONTROL 16
9077 +#define CELL_PMC_PM_STATUS 20
9078 +#define CELL_PMC_PM_CONTROL 21
9079 +#define CELL_PMC_PM_CONTROL_CNTR_MASK 0x01E00000UL
9080 +#define CELL_PMC_PM_CONTROL_CNTR_16 0x01E00000UL
9081 +
9082 +/*
9083 + * Mapping from Perfmon logical data counters to Cell hardware counters.
9084 + */
9085 +static struct pfm_regmap_desc pfm_cell_pmd_desc[] = {
9086 + PMD_D(PFM_REG_C, "pm0", 0),
9087 + PMD_D(PFM_REG_C, "pm1", 0),
9088 + PMD_D(PFM_REG_C, "pm2", 0),
9089 + PMD_D(PFM_REG_C, "pm3", 0),
9090 + PMD_D(PFM_REG_C, "pm4", 0),
9091 + PMD_D(PFM_REG_C, "pm5", 0),
9092 + PMD_D(PFM_REG_C, "pm6", 0),
9093 + PMD_D(PFM_REG_C, "pm7", 0),
9094 +};
9095 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc)
9096 +
9097 +#define PFM_EVENT_PMC_BUS_WORD(x) (((x) >> 48) & 0x00ff)
9098 +#define PFM_EVENT_PMC_FULL_SIGNAL_NUMBER(x) ((x) & 0xffffffff)
9099 +#define PFM_EVENT_PMC_SIGNAL_GROUP(x) (((x) & 0xffffffff) / 100)
9100 +#define PFM_PM_CTR_INPUT_MUX_BIT(pm07_control) (((pm07_control) >> 26) & 0x1f)
9101 +#define PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(pm07_control) ((pm07_control) >> 31)
9102 +#define PFM_GROUP_CONTROL_GROUP0_WORD(grp_ctrl) ((grp_ctrl) >> 30)
9103 +#define PFM_GROUP_CONTROL_GROUP1_WORD(grp_ctrl) (((grp_ctrl) >> 28) & 0x3)
9104 +#define PFM_NUM_OF_GROUPS 2
9105 +#define PFM_PPU_IU1_THREAD1_BASE_BIT 19
9106 +#define PFM_PPU_XU_THREAD1_BASE_BIT 16
9107 +#define PFM_COUNTER_CTRL_PMC_PPU_TH0 0x100000000ULL
9108 +#define PFM_COUNTER_CTRL_PMC_PPU_TH1 0x200000000ULL
9109 +
9110 +/*
9111 + * Debug-bus signal handling.
9112 + *
9113 + * Some Cell systems have firmware that can handle the debug-bus signal
9114 + * routing. For systems without this firmware, we have a minimal in-kernel
9115 + * implementation as well.
9116 + */
9117 +
9118 +/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */
9119 +#ifdef CONFIG_SCHED_SMT
9120 +#define RTAS_CPU(cpu) ((cpu) / 2)
9121 +#else
9122 +#define RTAS_CPU(cpu) (cpu)
9123 +#endif
9124 +#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff)
9125 +#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff)
9126 +#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff)
9127 +#define RTAS_SIGNAL_GROUP(x) (RTAS_SIGNAL_NUMBER(x) / 100)
9128 +
9129 +#define subfunc_RESET 1
9130 +#define subfunc_ACTIVATE 2
9131 +
9132 +#define passthru_ENABLE 1
9133 +#define passthru_DISABLE 2
9134 +
9135 +/**
9136 + * struct cell_rtas_arg
9137 + *
9138 + * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the
9139 + * firmware only sees the physical CPUs. So this value should be the
9140 + * SMT ID (from smp_processor_id() or get_cpu()) divided by 2.
9141 + * @sub_unit: Hardware subunit this applies to (if applicable).
9142 + * @signal_group: Signal group to enable/disable on the trace bus.
9143 + * @bus_word: For signal groups that propagate via the trace bus, this trace
9144 + * bus word will be used. This is a mask of (1 << TraceBusWord).
9145 + * For other signal groups, this specifies the trigger or event bus.
9146 + * @bit: Trigger/Event bit, if applicable for the signal group.
9147 + *
9148 + * An array of these structures are passed to rtas_call() to set up the
9149 + * signals on the debug bus.
9150 + **/
9151 +struct cell_rtas_arg {
9152 + u16 cpu;
9153 + u16 sub_unit;
9154 + s16 signal_group;
9155 + u8 bus_word;
9156 + u8 bit;
9157 +};
9158 +
9159 +/**
9160 + * rtas_reset_signals
9161 + *
9162 + * Use the firmware RTAS call to disable signal pass-thru and to reset the
9163 + * debug-bus signals.
9164 + **/
9165 +static int rtas_reset_signals(u32 cpu)
9166 +{
9167 + struct cell_rtas_arg signal;
9168 + u64 real_addr = virt_to_phys(&signal);
9169 + int rc;
9170 + struct pfm_cell_platform_pmu_info *info =
9171 + ((struct pfm_arch_pmu_info *)
9172 + (pfm_pmu_conf->pmu_info))->platform_info;
9173 +
9174 + memset(&signal, 0, sizeof(signal));
9175 + signal.cpu = RTAS_CPU(cpu);
9176 + rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"),
9177 + 5, 1, NULL,
9178 + subfunc_RESET,
9179 + passthru_DISABLE,
9180 + real_addr >> 32,
9181 + real_addr & 0xffffffff,
9182 + sizeof(signal));
9183 +
9184 + return rc;
9185 +}
9186 +
9187 +/**
9188 + * rtas_activate_signals
9189 + *
9190 + * Use the firmware RTAS call to enable signal pass-thru and to activate the
9191 + * desired signal groups on the debug-bus.
9192 + **/
9193 +static int rtas_activate_signals(struct cell_rtas_arg *signals,
9194 + int num_signals)
9195 +{
9196 + u64 real_addr = virt_to_phys(signals);
9197 + int rc;
9198 + struct pfm_cell_platform_pmu_info *info =
9199 + ((struct pfm_arch_pmu_info *)
9200 + (pfm_pmu_conf->pmu_info))->platform_info;
9201 +
9202 + rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"),
9203 + 5, 1, NULL,
9204 + subfunc_ACTIVATE,
9205 + passthru_ENABLE,
9206 + real_addr >> 32,
9207 + real_addr & 0xffffffff,
9208 + num_signals * sizeof(*signals));
9209 +
9210 + return rc;
9211 +}
9212 +
9213 +#define HID1_RESET_MASK (~0x00000001ffffffffUL)
9214 +#define PPU_IU1_WORD0_HID1_EN_MASK (~0x00000001f0c0802cUL)
9215 +#define PPU_IU1_WORD0_HID1_EN_WORD ( 0x00000001f0400000UL)
9216 +#define PPU_IU1_WORD1_HID1_EN_MASK (~0x000000010fc08023UL)
9217 +#define PPU_IU1_WORD1_HID1_EN_WORD ( 0x000000010f400001UL)
9218 +#define PPU_XU_WORD0_HID1_EN_MASK (~0x00000001f038402cUL)
9219 +#define PPU_XU_WORD0_HID1_EN_WORD ( 0x00000001f0080008UL)
9220 +#define PPU_XU_WORD1_HID1_EN_MASK (~0x000000010f074023UL)
9221 +#define PPU_XU_WORD1_HID1_EN_WORD ( 0x000000010f030002UL)
9222 +
9223 +/* The bus_word field in the cell_rtas_arg structure is a bit-mask
9224 + * indicating which debug-bus word(s) to use.
9225 + */
9226 +enum {
9227 + BUS_WORD_0 = 1,
9228 + BUS_WORD_1 = 2,
9229 + BUS_WORD_2 = 4,
9230 + BUS_WORD_3 = 8,
9231 +};
9232 +
9233 +/* Definitions of the signal-groups that the built-in signal-activation
9234 + * code can handle.
9235 + */
9236 +enum {
9237 + SIG_GROUP_NONE = 0,
9238 +
9239 + /* 2.x PowerPC Processor Unit (PPU) Signal Groups */
9240 + SIG_GROUP_PPU_BASE = 20,
9241 + SIG_GROUP_PPU_IU1 = 21,
9242 + SIG_GROUP_PPU_XU = 22,
9243 +
9244 + /* 3.x PowerPC Storage Subsystem (PPSS) Signal Groups */
9245 + SIG_GROUP_PPSS_BASE = 30,
9246 +
9247 + /* 4.x Synergistic Processor Unit (SPU) Signal Groups */
9248 + SIG_GROUP_SPU_BASE = 40,
9249 +
9250 + /* 5.x Memory Flow Controller (MFC) Signal Groups */
9251 + SIG_GROUP_MFC_BASE = 50,
9252 +
9253 + /* 6.x Element )nterconnect Bus (EIB) Signal Groups */
9254 + SIG_GROUP_EIB_BASE = 60,
9255 +
9256 + /* 7.x Memory Interface Controller (MIC) Signal Groups */
9257 + SIG_GROUP_MIC_BASE = 70,
9258 +
9259 + /* 8.x Cell Broadband Engine Interface (BEI) Signal Groups */
9260 + SIG_GROUP_BEI_BASE = 80,
9261 +};
9262 +
9263 +/**
9264 + * rmw_spr
9265 + *
9266 + * Read-modify-write for a special-purpose-register.
9267 + **/
9268 +#define rmw_spr(spr_id, a_mask, o_mask) \
9269 + do { \
9270 + u64 value = mfspr(spr_id); \
9271 + value &= (u64)(a_mask); \
9272 + value |= (u64)(o_mask); \
9273 + mtspr((spr_id), value); \
9274 + } while (0)
9275 +
9276 +/**
9277 + * rmw_mmio_reg64
9278 + *
9279 + * Read-modify-write for a 64-bit MMIO register.
9280 + **/
9281 +#define rmw_mmio_reg64(mem, a_mask, o_mask) \
9282 + do { \
9283 + u64 value = in_be64(&(mem)); \
9284 + value &= (u64)(a_mask); \
9285 + value |= (u64)(o_mask); \
9286 + out_be64(&(mem), value); \
9287 + } while (0)
9288 +
9289 +/**
9290 + * rmwb_mmio_reg64
9291 + *
9292 + * Set or unset a specified bit within a 64-bit MMIO register.
9293 + **/
9294 +#define rmwb_mmio_reg64(mem, bit_num, set_bit) \
9295 + rmw_mmio_reg64((mem), ~(1UL << (63 - (bit_num))), \
9296 + ((set_bit) << (63 - (bit_num))))
9297 +
9298 +/**
9299 + * passthru
9300 + *
9301 + * Enable or disable passthru mode in all the Cell signal islands.
9302 + **/
9303 +static int passthru(u32 cpu, u64 enable)
9304 +{
9305 + struct cbe_ppe_priv_regs __iomem *ppe_priv_regs;
9306 + struct cbe_pmd_regs __iomem *pmd_regs;
9307 + struct cbe_mic_tm_regs __iomem *mic_tm_regs;
9308 + struct pfm_cell_platform_pmu_info *info =
9309 + ((struct pfm_arch_pmu_info *)
9310 + (pfm_pmu_conf->pmu_info))->platform_info;
9311 +
9312 + ppe_priv_regs = info->get_cpu_ppe_priv_regs(cpu);
9313 + pmd_regs = info->get_cpu_pmd_regs(cpu);
9314 + mic_tm_regs = info->get_cpu_mic_tm_regs(cpu);
9315 +
9316 + if (!ppe_priv_regs || !pmd_regs || !mic_tm_regs) {
9317 + PFM_ERR("Error getting Cell PPE, PMD, and MIC "
9318 + "register maps: 0x%p, 0x%p, 0x%p",
9319 + ppe_priv_regs, pmd_regs, mic_tm_regs);
9320 + return -EINVAL;
9321 + }
9322 +
9323 + rmwb_mmio_reg64(ppe_priv_regs->L2_debug1, 61, enable);
9324 + rmwb_mmio_reg64(ppe_priv_regs->ciu_dr1, 5, enable);
9325 + rmwb_mmio_reg64(pmd_regs->on_ramp_trace, 39, enable);
9326 + rmwb_mmio_reg64(mic_tm_regs->MBL_debug, 20, enable);
9327 +
9328 + return 0;
9329 +}
9330 +
9331 +#define passthru_enable(cpu) passthru(cpu, 1)
9332 +#define passthru_disable(cpu) passthru(cpu, 0)
9333 +
9334 +static inline void reset_signal_registers(u32 cpu)
9335 +{
9336 + rmw_spr(SPRN_HID1, HID1_RESET_MASK, 0);
9337 +}
9338 +
9339 +/**
9340 + * celleb_reset_signals
9341 + *
9342 + * Non-rtas version of resetting the debug-bus signals.
9343 + **/
9344 +static int celleb_reset_signals(u32 cpu)
9345 +{
9346 + int rc;
9347 + rc = passthru_disable(cpu);
9348 + if (!rc)
9349 + reset_signal_registers(cpu);
9350 + return rc;
9351 +}
9352 +
9353 +/**
9354 + * ppu_selection
9355 + *
9356 + * Write the HID1 register to connect the specified PPU signal-group to the
9357 + * debug-bus.
9358 + **/
9359 +static int ppu_selection(struct cell_rtas_arg *signal)
9360 +{
9361 + u64 hid1_enable_word = 0;
9362 + u64 hid1_enable_mask = 0;
9363 +
9364 + switch (signal->signal_group) {
9365 +
9366 + case SIG_GROUP_PPU_IU1: /* 2.1 PPU Instruction Unit - Group 1 */
9367 + switch (signal->bus_word) {
9368 + case BUS_WORD_0:
9369 + hid1_enable_mask = PPU_IU1_WORD0_HID1_EN_MASK;
9370 + hid1_enable_word = PPU_IU1_WORD0_HID1_EN_WORD;
9371 + break;
9372 + case BUS_WORD_1:
9373 + hid1_enable_mask = PPU_IU1_WORD1_HID1_EN_MASK;
9374 + hid1_enable_word = PPU_IU1_WORD1_HID1_EN_WORD;
9375 + break;
9376 + default:
9377 + PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.",
9378 + signal->bus_word, signal->signal_group);
9379 + return -EINVAL;
9380 + }
9381 + break;
9382 +
9383 + case SIG_GROUP_PPU_XU: /* 2.2 PPU Execution Unit */
9384 + switch (signal->bus_word) {
9385 + case BUS_WORD_0:
9386 + hid1_enable_mask = PPU_XU_WORD0_HID1_EN_MASK;
9387 + hid1_enable_word = PPU_XU_WORD0_HID1_EN_WORD;
9388 + break;
9389 + case BUS_WORD_1:
9390 + hid1_enable_mask = PPU_XU_WORD1_HID1_EN_MASK;
9391 + hid1_enable_word = PPU_XU_WORD1_HID1_EN_WORD;
9392 + break;
9393 + default:
9394 + PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.",
9395 + signal->bus_word, signal->signal_group);
9396 + return -EINVAL;
9397 + }
9398 + break;
9399 +
9400 + default:
9401 + PFM_ERR("Signal-group %d not implemented.",
9402 + signal->signal_group);
9403 + return -EINVAL;
9404 + }
9405 +
9406 + rmw_spr(SPRN_HID1, hid1_enable_mask, hid1_enable_word);
9407 +
9408 + return 0;
9409 +}
9410 +
9411 +/**
9412 + * celleb_activate_signals
9413 + *
9414 + * Non-rtas version of activating the debug-bus signals.
9415 + **/
9416 +static int celleb_activate_signals(struct cell_rtas_arg *signals,
9417 + int num_signals)
9418 +{
9419 + int i, rc = -EINVAL;
9420 +
9421 + for (i = 0; i < num_signals; i++) {
9422 + switch (signals[i].signal_group) {
9423 +
9424 + /* 2.x PowerPC Processor Unit (PPU) Signal Selection */
9425 + case SIG_GROUP_PPU_IU1:
9426 + case SIG_GROUP_PPU_XU:
9427 + rc = ppu_selection(signals + i);
9428 + if (rc)
9429 + return rc;
9430 + break;
9431 +
9432 + default:
9433 + PFM_ERR("Signal-group %d not implemented.",
9434 + signals[i].signal_group);
9435 + return -EINVAL;
9436 + }
9437 + }
9438 +
9439 + if (0 < i)
9440 + rc = passthru_enable(signals[0].cpu);
9441 +
9442 + return rc;
9443 +}
9444 +
9445 +/**
9446 + * ps3_reset_signals
9447 + *
9448 + * ps3 version of resetting the debug-bus signals.
9449 + **/
9450 +static int ps3_reset_signals(u32 cpu)
9451 +{
9452 +#ifdef CONFIG_PPC_PS3
9453 + return ps3_set_signal(0, 0, 0, 0);
9454 +#else
9455 + return 0;
9456 +#endif
9457 +}
9458 +
9459 +/**
9460 + * ps3_activate_signals
9461 + *
9462 + * ps3 version of activating the debug-bus signals.
9463 + **/
9464 +static int ps3_activate_signals(struct cell_rtas_arg *signals,
9465 + int num_signals)
9466 +{
9467 +#ifdef CONFIG_PPC_PS3
9468 + int i;
9469 +
9470 + for (i = 0; i < num_signals; i++)
9471 + ps3_set_signal(signals[i].signal_group, signals[i].bit,
9472 + signals[i].sub_unit, signals[i].bus_word);
9473 +#endif
9474 + return 0;
9475 +}
9476 +
9477 +
9478 +/**
9479 + * reset_signals
9480 + *
9481 + * Call to the firmware (if available) to reset the debug-bus signals.
9482 + * Otherwise call the built-in version.
9483 + **/
9484 +int reset_signals(u32 cpu)
9485 +{
9486 + int rc;
9487 +
9488 + if (machine_is(celleb))
9489 + rc = celleb_reset_signals(cpu);
9490 + else if (machine_is(ps3))
9491 + rc = ps3_reset_signals(cpu);
9492 + else
9493 + rc = rtas_reset_signals(cpu);
9494 +
9495 + return rc;
9496 +}
9497 +
9498 +/**
9499 + * activate_signals
9500 + *
9501 + * Call to the firmware (if available) to activate the debug-bus signals.
9502 + * Otherwise call the built-in version.
9503 + **/
9504 +int activate_signals(struct cell_rtas_arg *signals, int num_signals)
9505 +{
9506 + int rc;
9507 +
9508 + if (machine_is(celleb))
9509 + rc = celleb_activate_signals(signals, num_signals);
9510 + else if (machine_is(ps3))
9511 + rc = ps3_activate_signals(signals, num_signals);
9512 + else
9513 + rc = rtas_activate_signals(signals, num_signals);
9514 +
9515 + return rc;
9516 +}
9517 +
9518 +/**
9519 + * pfm_cell_pmc_check
9520 + *
9521 + * Verify that we are going to write a valid value to the specified PMC.
9522 + **/
9523 +int pfm_cell_pmc_check(struct pfm_context *ctx,
9524 + struct pfm_event_set *set,
9525 + struct pfarg_pmc *req)
9526 +{
9527 + u16 cnum, reg_num = req->reg_num;
9528 + s16 signal_group = RTAS_SIGNAL_GROUP(req->reg_value);
9529 + u8 bus_word = RTAS_BUS_WORD(req->reg_value);
9530 +
9531 + if (reg_num < NR_CTRS || reg_num >= (NR_CTRS * 2))
9532 + return -EINVAL;
9533 +
9534 + switch (signal_group) {
9535 + case SIG_GROUP_PPU_IU1:
9536 + case SIG_GROUP_PPU_XU:
9537 + if ((bus_word != 0) && (bus_word != 1)) {
9538 + PFM_ERR("Invalid bus word (%d) for signal-group %d",
9539 + bus_word, signal_group);
9540 + return -EINVAL;
9541 + }
9542 + break;
9543 + default:
9544 + PFM_ERR("Signal-group %d not implemented.", signal_group);
9545 + return -EINVAL;
9546 + }
9547 +
9548 + for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) {
9549 + if (test_bit(cnum, cast_ulp(set->used_pmcs)) &&
9550 + bus_word == RTAS_BUS_WORD(set->pmcs[cnum]) &&
9551 + signal_group != RTAS_SIGNAL_GROUP(set->pmcs[cnum])) {
9552 + PFM_ERR("Impossible signal-group combination: "
9553 + "(%u,%u,%d) (%u,%u,%d)",
9554 + reg_num, bus_word, signal_group, cnum,
9555 + RTAS_BUS_WORD(set->pmcs[cnum]),
9556 + RTAS_SIGNAL_GROUP(set->pmcs[cnum]));
9557 + return -EBUSY;
9558 + }
9559 + }
9560 +
9561 + return 0;
9562 +}
9563 +
9564 +/**
9565 + * write_pm07_event
9566 + *
9567 + * Pull out the RTAS arguments from the 64-bit register value and make the
9568 + * RTAS activate-signals call.
9569 + **/
9570 +static void write_pm07_event(int cpu, unsigned int ctr, u64 value)
9571 +{
9572 + struct cell_rtas_arg signal;
9573 + s32 signal_number;
9574 + int rc;
9575 +
9576 + signal_number = RTAS_SIGNAL_NUMBER(value);
9577 + if (!signal_number) {
9578 + /* Don't include counters that are counting cycles. */
9579 + return;
9580 + }
9581 +
9582 + signal.cpu = RTAS_CPU(cpu);
9583 + signal.bus_word = 1 << RTAS_BUS_WORD(value);
9584 + signal.sub_unit = RTAS_SUB_UNIT(value);
9585 + signal.signal_group = signal_number / 100;
9586 + signal.bit = abs(signal_number) % 100;
9587 +
9588 + rc = activate_signals(&signal, 1);
9589 + if (rc) {
9590 + PFM_WARN("%s(%d, %u, %lu): Error calling "
9591 + "activate_signals(): %d\n", __func__,
9592 + cpu, ctr, (unsigned long)value, rc);
9593 + /* FIX: Could we change this routine to return an error? */
9594 + }
9595 +}
9596 +
9597 +/**
9598 + * pfm_cell_probe_pmu
9599 + *
9600 + * Simply check the processor version register to see if we're currently
9601 + * on a Cell system.
9602 + **/
9603 +static int pfm_cell_probe_pmu(void)
9604 +{
9605 + unsigned long pvr = mfspr(SPRN_PVR);
9606 +
9607 + if (PVR_VER(pvr) != PV_BE)
9608 + return -1;
9609 +
9610 + return 0;
9611 +}
9612 +
9613 +/**
9614 + * pfm_cell_write_pmc
9615 + **/
9616 +static void pfm_cell_write_pmc(unsigned int cnum, u64 value)
9617 +{
9618 + int cpu = smp_processor_id();
9619 + struct pfm_cell_platform_pmu_info *info =
9620 + ((struct pfm_arch_pmu_info *)
9621 + (pfm_pmu_conf->pmu_info))->platform_info;
9622 +
9623 + if (cnum < NR_CTRS) {
9624 + info->write_pm07_control(cpu, cnum, value);
9625 +
9626 + } else if (cnum < NR_CTRS * 2) {
9627 + write_pm07_event(cpu, cnum - NR_CTRS, value);
9628 +
9629 + } else if (cnum == CELL_PMC_PM_STATUS) {
9630 + /* The pm_status register must be treated separately from
9631 + * the other "global" PMCs. This call will ensure that
9632 + * the interrupts are routed to the correct CPU, as well
9633 + * as writing the desired value to the pm_status register.
9634 + */
9635 + info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu),
9636 + value);
9637 +
9638 + } else if (cnum < PFM_PM_NUM_PMCS) {
9639 + info->write_pm(cpu, cnum - (NR_CTRS * 2), value);
9640 + }
9641 +}
9642 +
9643 +/**
9644 + * pfm_cell_write_pmd
9645 + **/
9646 +static void pfm_cell_write_pmd(unsigned int cnum, u64 value)
9647 +{
9648 + int cpu = smp_processor_id();
9649 + struct pfm_cell_platform_pmu_info *info =
9650 + ((struct pfm_arch_pmu_info *)
9651 + (pfm_pmu_conf->pmu_info))->platform_info;
9652 +
9653 + if (cnum < NR_CTRS)
9654 + info->write_ctr(cpu, cnum, value);
9655 +}
9656 +
9657 +/**
9658 + * pfm_cell_read_pmd
9659 + **/
9660 +static u64 pfm_cell_read_pmd(unsigned int cnum)
9661 +{
9662 + int cpu = smp_processor_id();
9663 + struct pfm_cell_platform_pmu_info *info =
9664 + ((struct pfm_arch_pmu_info *)
9665 + (pfm_pmu_conf->pmu_info))->platform_info;
9666 +
9667 + if (cnum < NR_CTRS)
9668 + return info->read_ctr(cpu, cnum);
9669 +
9670 + return -EINVAL;
9671 +}
9672 +
9673 +/**
9674 + * pfm_cell_enable_counters
9675 + *
9676 + * Just need to turn on the global disable bit in pm_control.
9677 + **/
9678 +static void pfm_cell_enable_counters(struct pfm_context *ctx,
9679 + struct pfm_event_set *set)
9680 +{
9681 + struct pfm_cell_platform_pmu_info *info =
9682 + ((struct pfm_arch_pmu_info *)
9683 + (pfm_pmu_conf->pmu_info))->platform_info;
9684 +
9685 + info->enable_pm(smp_processor_id());
9686 +}
9687 +
9688 +/**
9689 + * pfm_cell_disable_counters
9690 + *
9691 + * Just need to turn off the global disable bit in pm_control.
9692 + **/
9693 +static void pfm_cell_disable_counters(struct pfm_context *ctx,
9694 + struct pfm_event_set *set)
9695 +{
9696 + struct pfm_cell_platform_pmu_info *info =
9697 + ((struct pfm_arch_pmu_info *)
9698 + (pfm_pmu_conf->pmu_info))->platform_info;
9699 +
9700 + info->disable_pm(smp_processor_id());
9701 + if (machine_is(ps3))
9702 + reset_signals(smp_processor_id());
9703 +}
9704 +
9705 +/*
9706 + * Return the thread id of the specified ppu signal.
9707 + */
9708 +static inline u32 get_target_ppu_thread_id(u32 group, u32 bit)
9709 +{
9710 + if ((group == SIG_GROUP_PPU_IU1 &&
9711 + bit < PFM_PPU_IU1_THREAD1_BASE_BIT) ||
9712 + (group == SIG_GROUP_PPU_XU &&
9713 + bit < PFM_PPU_XU_THREAD1_BASE_BIT))
9714 + return 0;
9715 + else
9716 + return 1;
9717 +}
9718 +
9719 +/*
9720 + * Return whether the specified counter is for PPU signal group.
9721 + */
9722 +static inline int is_counter_for_ppu_sig_grp(u32 counter_control, u32 sig_grp)
9723 +{
9724 + if (!(counter_control & CBE_PM_CTR_INPUT_CONTROL) &&
9725 + (counter_control & CBE_PM_CTR_ENABLE) &&
9726 + ((sig_grp == SIG_GROUP_PPU_IU1) || (sig_grp == SIG_GROUP_PPU_XU)))
9727 + return 1;
9728 + else
9729 + return 0;
9730 +}
9731 +
9732 +/*
9733 + * Search ppu signal groups.
9734 + */
9735 +static int get_ppu_signal_groups(struct pfm_event_set *set,
9736 + u32 *ppu_sig_grp0, u32 *ppu_sig_grp1)
9737 +{
9738 + u64 pm_event, *used_pmcs = set->used_pmcs;
9739 + int i, j;
9740 + u32 grp0_wd, grp1_wd, wd, sig_grp;
9741 +
9742 + *ppu_sig_grp0 = 0;
9743 + *ppu_sig_grp1 = 0;
9744 + grp0_wd = PFM_GROUP_CONTROL_GROUP0_WORD(
9745 + set->pmcs[CELL_PMC_GROUP_CONTROL]);
9746 + grp1_wd = PFM_GROUP_CONTROL_GROUP1_WORD(
9747 + set->pmcs[CELL_PMC_GROUP_CONTROL]);
9748 +
9749 + for (i = 0, j = 0; (i < NR_CTRS) && (j < PFM_NUM_OF_GROUPS); i++) {
9750 + if (test_bit(i + NR_CTRS, used_pmcs)) {
9751 + pm_event = set->pmcs[i + NR_CTRS];
9752 + wd = PFM_EVENT_PMC_BUS_WORD(pm_event);
9753 + sig_grp = PFM_EVENT_PMC_SIGNAL_GROUP(pm_event);
9754 + if ((sig_grp == SIG_GROUP_PPU_IU1) ||
9755 + (sig_grp == SIG_GROUP_PPU_XU)) {
9756 +
9757 + if (wd == grp0_wd && *ppu_sig_grp0 == 0) {
9758 + *ppu_sig_grp0 = sig_grp;
9759 + j++;
9760 + } else if (wd == grp1_wd &&
9761 + *ppu_sig_grp1 == 0) {
9762 + *ppu_sig_grp1 = sig_grp;
9763 + j++;
9764 + }
9765 + }
9766 + }
9767 + }
9768 + return j;
9769 +}
9770 +
9771 +/**
9772 + * pfm_cell_restore_pmcs
9773 + *
9774 + * Write all control register values that are saved in the specified event
9775 + * set. We could use the pfm_arch_write_pmc() function to restore each PMC
9776 + * individually (as is done in other architectures), but that results in
9777 + * multiple RTAS calls. As an optimization, we will setup the RTAS argument
9778 + * array so we can do all event-control registers in one RTAS call.
9779 + *
9780 + * In per-thread mode,
9781 + * The counter enable bit of the pmX_control PMC is enabled while the target
9782 + * task runs on the target HW thread.
9783 + **/
9784 +void pfm_cell_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
9785 +{
9786 + u64 ctr_ctrl;
9787 + u64 *used_pmcs = set->used_pmcs;
9788 + int i;
9789 + int cpu = smp_processor_id();
9790 + u32 current_th_id;
9791 + struct pfm_cell_platform_pmu_info *info =
9792 + ((struct pfm_arch_pmu_info *)
9793 + (pfm_pmu_conf->pmu_info))->platform_info;
9794 +
9795 + for (i = 0; i < NR_CTRS; i++) {
9796 + ctr_ctrl = set->pmcs[i];
9797 +
9798 + if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH0) {
9799 + current_th_id = info->get_hw_thread_id(cpu);
9800 +
9801 + /*
9802 + * Set the counter enable bit down if the current
9803 + * HW thread is NOT 0
9804 + **/
9805 + if (current_th_id)
9806 + ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE;
9807 +
9808 + } else if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH1) {
9809 + current_th_id = info->get_hw_thread_id(cpu);
9810 +
9811 + /*
9812 + * Set the counter enable bit down if the current
9813 + * HW thread is 0
9814 + **/
9815 + if (!current_th_id)
9816 + ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE;
9817 + }
9818 +
9819 + /* Write the per-counter control register. If the PMC is not
9820 + * in use, then it will simply clear the register, which will
9821 + * disable the associated counter.
9822 + */
9823 + info->write_pm07_control(cpu, i, ctr_ctrl);
9824 +
9825 + if (test_bit(i + NR_CTRS, used_pmcs))
9826 + write_pm07_event(cpu, 0, set->pmcs[i + NR_CTRS]);
9827 + }
9828 +
9829 + /* Write all the global PMCs. Need to call pfm_cell_write_pmc()
9830 + * instead of cbe_write_pm() due to special handling for the
9831 + * pm_status register.
9832 + */
9833 + for (i *= 2; i < PFM_PM_NUM_PMCS; i++)
9834 + pfm_cell_write_pmc(i, set->pmcs[i]);
9835 +}
9836 +
9837 +/**
9838 + * pfm_cell_restore_pmds
9839 + *
9840 + * Write to pm_control register before writing to counter registers
9841 + * so that we can decide the counter width berfore writing to the couters.
9842 + **/
9843 +void pfm_cell_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
9844 +{
9845 + u64 *used_pmds;
9846 + unsigned int i, max_pmd;
9847 + int cpu = smp_processor_id();
9848 + struct pfm_cell_platform_pmu_info *info =
9849 + ((struct pfm_arch_pmu_info *)
9850 + (pfm_pmu_conf->pmu_info))->platform_info;
9851 +
9852 + /*
9853 + * Write pm_control register value
9854 + */
9855 + info->write_pm(cpu, pm_control,
9856 + set->pmcs[CELL_PMC_PM_CONTROL] &
9857 + ~CBE_PM_ENABLE_PERF_MON);
9858 + PFM_DBG("restore pm_control(0x%lx) before restoring pmds",
9859 + set->pmcs[CELL_PMC_PM_CONTROL]);
9860 +
9861 + max_pmd = ctx->regs.max_pmd;
9862 + used_pmds = set->used_pmds;
9863 +
9864 + for (i = 0; i < max_pmd; i++)
9865 + if (test_bit(i, used_pmds) &&
9866 + !(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_RO))
9867 + pfm_cell_write_pmd(i, set->pmds[i].value);
9868 +}
9869 +
9870 +/**
9871 + * pfm_cell_get_cntr_width
9872 + *
9873 + * This function check the 16bit counter field in pm_control pmc.
9874 + *
9875 + * Return value
9876 + * 16 : all counters are 16bit width.
9877 + * 32 : all counters are 32bit width.
9878 + * 0 : several counter width exists.
9879 + **/
9880 +static int pfm_cell_get_cntr_width(struct pfm_context *ctx,
9881 + struct pfm_event_set *s)
9882 +{
9883 + int width = 0;
9884 + int tmp = 0;
9885 + u64 cntr_field;
9886 +
9887 + if (ctx->flags.switch_ovfl || ctx->flags.switch_time) {
9888 + list_for_each_entry(s, &ctx->set_list, list) {
9889 + cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] &
9890 + CELL_PMC_PM_CONTROL_CNTR_MASK;
9891 +
9892 + if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16)
9893 + tmp = 16;
9894 + else if (cntr_field == 0x0)
9895 + tmp = 32;
9896 + else
9897 + return 0;
9898 +
9899 + if (tmp != width && width != 0)
9900 + return 0;
9901 +
9902 + width = tmp;
9903 + }
9904 + } else {
9905 + cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] &
9906 + CELL_PMC_PM_CONTROL_CNTR_MASK;
9907 +
9908 + if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16)
9909 + width = 16;
9910 + else if (cntr_field == 0x0)
9911 + width = 32;
9912 + else
9913 + width = 0;
9914 + }
9915 + return width;
9916 +}
9917 +
9918 +/**
9919 + * pfm_cell_check_cntr_ovfl_mask
9920 + *
9921 + * Return value
9922 + * 1 : cntr_ovfl interrupt is used.
9923 + * 0 : cntr_ovfl interrupt is not used.
9924 + **/
9925 +static int pfm_cell_check_cntr_ovfl(struct pfm_context *ctx,
9926 + struct pfm_event_set *s)
9927 +{
9928 + if (ctx->flags.switch_ovfl || ctx->flags.switch_time) {
9929 + list_for_each_entry(s, &ctx->set_list, list) {
9930 + if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS]))
9931 + return 1;
9932 + }
9933 + } else {
9934 + if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS]))
9935 + return 1;
9936 + }
9937 + return 0;
9938 +}
9939 +
9940 +#ifdef CONFIG_PPC_PS3
9941 +/**
9942 + * update_sub_unit_field
9943 + *
9944 + **/
9945 +static inline u64 update_sub_unit_field(u64 pm_event, u64 spe_id)
9946 +{
9947 + return ((pm_event & 0xFFFF0000FFFFFFFF) | (spe_id << 32));
9948 +}
9949 +
9950 +/**
9951 + * pfm_get_spe_id
9952 + *
9953 + **/
9954 +static u64 pfm_get_spe_id(void *arg)
9955 +{
9956 + struct spu *spu = arg;
9957 + u64 spe_id;
9958 +
9959 + if (machine_is(ps3))
9960 + spe_id = ps3_get_spe_id(arg);
9961 + else
9962 + spe_id = spu->spe_id;
9963 +
9964 + return spe_id;
9965 +}
9966 +
9967 +/**
9968 + * pfm_spu_number_to_id
9969 + *
9970 + **/
9971 +static int pfm_spu_number_to_id(int number, u64 *spe_id)
9972 +{
9973 + struct spu *spu;
9974 + int i;
9975 +
9976 + for (i = 0; i < MAX_NUMNODES; i++) {
9977 + if (cbe_spu_info[i].n_spus == 0)
9978 + continue;
9979 +
9980 + list_for_each_entry(spu, &cbe_spu_info[i].spus, cbe_list)
9981 + if (spu->number == number) {
9982 + *spe_id = pfm_get_spe_id(spu);
9983 + return 0;
9984 + }
9985 + }
9986 + return -ENODEV;
9987 +}
9988 +
9989 +/**
9990 + * pfm_update_pmX_event_subunit_field
9991 + *
9992 + * In system wide mode,
9993 + * This function updates the subunit field of SPE pmX_event.
9994 + **/
9995 +static int pfm_update_pmX_event_subunit_field(struct pfm_context *ctx)
9996 +{
9997 + struct pfm_event_set *set;
9998 + int i, last_pmc, ret;
9999 + u64 signal_group, spe_id;
10000 + int sub_unit;
10001 + u64 *used_pmcs;
10002 +
10003 + last_pmc = NR_CTRS + 8;
10004 + ret = 0;
10005 + list_for_each_entry(set, &ctx->set_list, list) {
10006 +
10007 + used_pmcs = set->used_pmcs;
10008 + for (i = NR_CTRS; i < last_pmc; i++) {
10009 + if (!test_bit(i, used_pmcs))
10010 + continue;
10011 +
10012 + signal_group = PFM_EVENT_PMC_SIGNAL_GROUP(set->pmcs[i]);
10013 +
10014 + /*
10015 + * If the target event is a SPE signal group event,
10016 + * The sub_unit field in pmX_event pmc is changed to the
10017 + * specified spe_id.
10018 + */
10019 + if (SIG_GROUP_SPU_BASE < signal_group &&
10020 + signal_group < SIG_GROUP_EIB_BASE) {
10021 + sub_unit = RTAS_SUB_UNIT(set->pmcs[i]);
10022 +
10023 + ret = pfm_spu_number_to_id(sub_unit, &spe_id);
10024 + if (ret)
10025 + return ret;
10026 +
10027 + set->pmcs[i] = update_sub_unit_field(
10028 + set->pmcs[i], spe_id);
10029 + }
10030 + }
10031 + }
10032 + return 0;
10033 +}
10034 +#endif
10035 +
10036 +/**
10037 + * pfm_cell_load_context
10038 + *
10039 + * In per-thread mode,
10040 + * The pmX_control PMCs which are used for PPU IU/XU event are marked with
10041 + * the thread id(PFM_COUNTER_CTRL_PMC_PPU_TH0/TH1).
10042 + **/
10043 +static int pfm_cell_load_context(struct pfm_context *ctx)
10044 +{
10045 + int i;
10046 + u32 ppu_sig_grp[PFM_NUM_OF_GROUPS] = {SIG_GROUP_NONE, SIG_GROUP_NONE};
10047 + u32 bit;
10048 + int index;
10049 + u32 target_th_id;
10050 + int ppu_sig_num = 0;
10051 + struct pfm_event_set *s;
10052 + int cntr_width = 32;
10053 + int ret = 0;
10054 +
10055 + if (pfm_cell_check_cntr_ovfl(ctx, ctx->active_set)) {
10056 + cntr_width = pfm_cell_get_cntr_width(ctx, ctx->active_set);
10057 +
10058 + /*
10059 + * Counter overflow interrupt works with only 32bit counter,
10060 + * because perfmon core uses pfm_cell_pmu_conf.counter_width
10061 + * to deal with the counter overflow. we can't change the
10062 + * counter width here.
10063 + */
10064 + if (cntr_width != 32)
10065 + return -EINVAL;
10066 + }
10067 +
10068 + if (ctx->flags.system) {
10069 +#ifdef CONFIG_PPC_PS3
10070 + if (machine_is(ps3))
10071 + ret = pfm_update_pmX_event_subunit_field(ctx);
10072 +#endif
10073 + return ret;
10074 + }
10075 +
10076 + list_for_each_entry(s, &ctx->set_list, list) {
10077 + ppu_sig_num = get_ppu_signal_groups(s, &ppu_sig_grp[0],
10078 + &ppu_sig_grp[1]);
10079 +
10080 + for (i = 0; i < NR_CTRS; i++) {
10081 + index = PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(s->pmcs[i]);
10082 + if (ppu_sig_num &&
10083 + (ppu_sig_grp[index] != SIG_GROUP_NONE) &&
10084 + is_counter_for_ppu_sig_grp(s->pmcs[i],
10085 + ppu_sig_grp[index])) {
10086 +
10087 + bit = PFM_PM_CTR_INPUT_MUX_BIT(s->pmcs[i]);
10088 + target_th_id = get_target_ppu_thread_id(
10089 + ppu_sig_grp[index], bit);
10090 + if (!target_th_id)
10091 + s->pmcs[i] |=
10092 + PFM_COUNTER_CTRL_PMC_PPU_TH0;
10093 + else
10094 + s->pmcs[i] |=
10095 + PFM_COUNTER_CTRL_PMC_PPU_TH1;
10096 + PFM_DBG("set:%d mark ctr:%d target_thread:%d",
10097 + s->id, i, target_th_id);
10098 + }
10099 + }
10100 + }
10101 +
10102 + return ret;
10103 +}
10104 +
10105 +/**
10106 + * pfm_cell_unload_context
10107 + *
10108 + * For system-wide contexts and self-monitored contexts, make the RTAS call
10109 + * to reset the debug-bus signals.
10110 + *
10111 + * For non-self-monitored contexts, the monitored thread will already have
10112 + * been taken off the CPU and we don't need to do anything additional.
10113 + **/
10114 +static void pfm_cell_unload_context(struct pfm_context *ctx)
10115 +{
10116 + if (ctx->task == current || ctx->flags.system)
10117 + reset_signals(smp_processor_id());
10118 +}
10119 +
10120 +/**
10121 + * pfm_cell_ctxswout_thread
10122 + *
10123 + * When a monitored thread is switched out (self-monitored or externally
10124 + * monitored) we need to reset the debug-bus signals so the next context that
10125 + * gets switched in can start from a clean set of signals.
10126 + **/
10127 +int pfm_cell_ctxswout_thread(struct task_struct *task,
10128 + struct pfm_context *ctx, struct pfm_event_set *set)
10129 +{
10130 + reset_signals(smp_processor_id());
10131 + return 0;
10132 +}
10133 +
10134 +/**
10135 + * pfm_cell_get_ovfl_pmds
10136 + *
10137 + * Determine which counters in this set have overflowed and fill in the
10138 + * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status
10139 + * register contains a bit for each counter to indicate overflow. However,
10140 + * those 8 bits are in the reverse order than what Perfmon2 is expecting,
10141 + * so we need to reverse the order of the overflow bits.
10142 + **/
10143 +static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx,
10144 + struct pfm_event_set *set)
10145 +{
10146 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
10147 + u32 pm_status, ovfl_ctrs;
10148 + u64 povfl_pmds = 0;
10149 + int i;
10150 + struct pfm_cell_platform_pmu_info *info =
10151 + ((struct pfm_arch_pmu_info *)
10152 + (pfm_pmu_conf->pmu_info))->platform_info;
10153 +
10154 + if (!ctx_arch->last_read_updated)
10155 + /* This routine was not called via the interrupt handler.
10156 + * Need to start by getting interrupts and updating
10157 + * last_read_pm_status.
10158 + */
10159 + ctx_arch->last_read_pm_status =
10160 + info->get_and_clear_pm_interrupts(smp_processor_id());
10161 +
10162 + /* Reset the flag that the interrupt handler last read pm_status. */
10163 + ctx_arch->last_read_updated = 0;
10164 +
10165 + pm_status = ctx_arch->last_read_pm_status &
10166 + set->pmcs[CELL_PMC_PM_STATUS];
10167 + ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status);
10168 +
10169 + /* Reverse the order of the bits in ovfl_ctrs
10170 + * and store the result in povfl_pmds.
10171 + */
10172 + for (i = 0; i < PFM_PM_NUM_PMDS; i++) {
10173 + povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1);
10174 + ovfl_ctrs >>= 1;
10175 + }
10176 +
10177 + /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds.
10178 + * Count the bits set in set->povfl_pmds to get set->npend_ovfls.
10179 + */
10180 + bitmap_and(set->povfl_pmds, &povfl_pmds,
10181 + set->used_pmds, PFM_PM_NUM_PMDS);
10182 + set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS);
10183 +}
10184 +
10185 +/**
10186 + * pfm_cell_acquire_pmu
10187 + *
10188 + * acquire PMU resource.
10189 + * This acquisition is done when the first context is created.
10190 + **/
10191 +int pfm_cell_acquire_pmu(u64 *unavail_pmcs, u64 *unavail_pmds)
10192 +{
10193 +#ifdef CONFIG_PPC_PS3
10194 + int ret;
10195 +
10196 + if (machine_is(ps3)) {
10197 + PFM_DBG("");
10198 + ret = ps3_lpm_open(PS3_LPM_TB_TYPE_INTERNAL, NULL, 0);
10199 + if (ret) {
10200 + PFM_ERR("Can't create PS3 lpm. error:%d", ret);
10201 + return -EFAULT;
10202 + }
10203 + }
10204 +#endif
10205 + return 0;
10206 +}
10207 +
10208 +/**
10209 + * pfm_cell_release_pmu
10210 + *
10211 + * release PMU resource.
10212 + * actual release happens when last context is destroyed
10213 + **/
10214 +void pfm_cell_release_pmu(void)
10215 +{
10216 +#ifdef CONFIG_PPC_PS3
10217 + if (machine_is(ps3)) {
10218 + if (ps3_lpm_close())
10219 + PFM_ERR("Can't delete PS3 lpm.");
10220 + }
10221 +#endif
10222 +}
10223 +
10224 +/**
10225 + * handle_trace_buffer_interrupts
10226 + *
10227 + * This routine is for processing just the interval timer and trace buffer
10228 + * overflow interrupts. Performance counter interrupts are handled by the
10229 + * perf_irq_handler() routine, which reads and saves the pm_status register.
10230 + * This routine should not read the actual pm_status register, but rather
10231 + * the value passed in.
10232 + **/
10233 +static void handle_trace_buffer_interrupts(unsigned long iip,
10234 + struct pt_regs *regs,
10235 + struct pfm_context *ctx,
10236 + u32 pm_status)
10237 +{
10238 + /* FIX: Currently ignoring trace-buffer interrupts. */
10239 + return;
10240 +}
10241 +
10242 +/**
10243 + * pfm_cell_irq_handler
10244 + *
10245 + * Handler for all Cell performance-monitor interrupts.
10246 + **/
10247 +static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx)
10248 +{
10249 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
10250 + u32 last_read_pm_status;
10251 + int cpu = smp_processor_id();
10252 + struct pfm_cell_platform_pmu_info *info =
10253 + ((struct pfm_arch_pmu_info *)
10254 + (pfm_pmu_conf->pmu_info))->platform_info;
10255 +
10256 + /* Need to disable and reenable the performance counters to get the
10257 + * desired behavior from the hardware. This is specific to the Cell
10258 + * PMU hardware.
10259 + */
10260 + info->disable_pm(cpu);
10261 +
10262 + /* Read the pm_status register to get the interrupt bits. If a
10263 + * perfmormance counter overflow interrupt occurred, call the core
10264 + * perfmon interrupt handler to service the counter overflow. If the
10265 + * interrupt was for the interval timer or the trace_buffer,
10266 + * call the interval timer and trace buffer interrupt handler.
10267 + *
10268 + * The value read from the pm_status register is stored in the
10269 + * pmf_arch_context structure for use by other routines. Note that
10270 + * reading the pm_status register resets the interrupt flags to zero.
10271 + * Hence, it is important that the register is only read in one place.
10272 + *
10273 + * The pm_status reg interrupt reg format is:
10274 + * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:]
10275 + * - pmd0 to pm7 are the perf counter overflow interrupts.
10276 + * - intt is the interval timer overflowed interrupt.
10277 + * - tbf is the trace buffer full interrupt.
10278 + * - tbu is the trace buffer underflow interrupt.
10279 + * - The pmd0 bit is the MSB of the 32 bit register.
10280 + */
10281 + ctx_arch->last_read_pm_status = last_read_pm_status =
10282 + info->get_and_clear_pm_interrupts(cpu);
10283 +
10284 + /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows
10285 + * last_read_pm_status was updated by the interrupt handler.
10286 + */
10287 + ctx_arch->last_read_updated = 1;
10288 +
10289 + if (last_read_pm_status & CBE_PM_ALL_OVERFLOW_INTR)
10290 + /* At least one counter overflowed. */
10291 + pfm_interrupt_handler(instruction_pointer(regs), regs);
10292 +
10293 + if (last_read_pm_status & (CBE_PM_INTERVAL_INTR |
10294 + CBE_PM_TRACE_BUFFER_FULL_INTR |
10295 + CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR))
10296 + /* Trace buffer or interval timer overflow. */
10297 + handle_trace_buffer_interrupts(instruction_pointer(regs),
10298 + regs, ctx, last_read_pm_status);
10299 +
10300 + /* The interrupt settings is the value written to the pm_status
10301 + * register. It is saved in the context when the register is
10302 + * written.
10303 + */
10304 + info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu),
10305 + ctx->active_set->pmcs[CELL_PMC_PM_STATUS]);
10306 +
10307 + /* The writes to the various performance counters only writes to a
10308 + * latch. The new values (interrupt setting bits, reset counter value
10309 + * etc.) are not copied to the actual registers until the performance
10310 + * monitor is enabled. In order to get this to work as desired, the
10311 + * permormance monitor needs to be disabled while writting to the
10312 + * latches. This is a HW design issue.
10313 + */
10314 + info->enable_pm(cpu);
10315 +}
10316 +
10317 +
10318 +static struct pfm_cell_platform_pmu_info ps3_platform_pmu_info = {
10319 +#ifdef CONFIG_PPC_PS3
10320 + .read_ctr = ps3_read_ctr,
10321 + .write_ctr = ps3_write_ctr,
10322 + .write_pm07_control = ps3_write_pm07_control,
10323 + .write_pm = ps3_write_pm,
10324 + .enable_pm = ps3_enable_pm,
10325 + .disable_pm = ps3_disable_pm,
10326 + .enable_pm_interrupts = ps3_enable_pm_interrupts,
10327 + .get_and_clear_pm_interrupts = ps3_get_and_clear_pm_interrupts,
10328 + .get_hw_thread_id = ps3_get_hw_thread_id,
10329 + .get_cpu_ppe_priv_regs = NULL,
10330 + .get_cpu_pmd_regs = NULL,
10331 + .get_cpu_mic_tm_regs = NULL,
10332 + .rtas_token = NULL,
10333 + .rtas_call = NULL,
10334 +#endif
10335 +};
10336 +
10337 +static struct pfm_cell_platform_pmu_info native_platform_pmu_info = {
10338 +#ifdef CONFIG_PPC_CELL_NATIVE
10339 + .read_ctr = cbe_read_ctr,
10340 + .write_ctr = cbe_write_ctr,
10341 + .write_pm07_control = cbe_write_pm07_control,
10342 + .write_pm = cbe_write_pm,
10343 + .enable_pm = cbe_enable_pm,
10344 + .disable_pm = cbe_disable_pm,
10345 + .enable_pm_interrupts = cbe_enable_pm_interrupts,
10346 + .get_and_clear_pm_interrupts = cbe_get_and_clear_pm_interrupts,
10347 + .get_hw_thread_id = cbe_get_hw_thread_id,
10348 + .get_cpu_ppe_priv_regs = cbe_get_cpu_ppe_priv_regs,
10349 + .get_cpu_pmd_regs = cbe_get_cpu_pmd_regs,
10350 + .get_cpu_mic_tm_regs = cbe_get_cpu_mic_tm_regs,
10351 + .rtas_token = rtas_token,
10352 + .rtas_call = rtas_call,
10353 +#endif
10354 +};
10355 +
10356 +static struct pfm_arch_pmu_info pfm_cell_pmu_info = {
10357 + .pmu_style = PFM_POWERPC_PMU_CELL,
10358 + .acquire_pmu = pfm_cell_acquire_pmu,
10359 + .release_pmu = pfm_cell_release_pmu,
10360 + .write_pmc = pfm_cell_write_pmc,
10361 + .write_pmd = pfm_cell_write_pmd,
10362 + .read_pmd = pfm_cell_read_pmd,
10363 + .enable_counters = pfm_cell_enable_counters,
10364 + .disable_counters = pfm_cell_disable_counters,
10365 + .irq_handler = pfm_cell_irq_handler,
10366 + .get_ovfl_pmds = pfm_cell_get_ovfl_pmds,
10367 + .restore_pmcs = pfm_cell_restore_pmcs,
10368 + .restore_pmds = pfm_cell_restore_pmds,
10369 + .ctxswout_thread = pfm_cell_ctxswout_thread,
10370 + .load_context = pfm_cell_load_context,
10371 + .unload_context = pfm_cell_unload_context,
10372 +};
10373 +
10374 +static struct pfm_pmu_config pfm_cell_pmu_conf = {
10375 + .pmu_name = "Cell",
10376 + .version = "0.1",
10377 + .counter_width = 32,
10378 + .pmd_desc = pfm_cell_pmd_desc,
10379 + .pmc_desc = pfm_cell_pmc_desc,
10380 + .num_pmc_entries = PFM_PM_NUM_PMCS,
10381 + .num_pmd_entries = PFM_PM_NUM_PMDS,
10382 + .probe_pmu = pfm_cell_probe_pmu,
10383 + .pmu_info = &pfm_cell_pmu_info,
10384 + .flags = PFM_PMU_BUILTIN_FLAG,
10385 + .owner = THIS_MODULE,
10386 +};
10387 +
10388 +/**
10389 + * pfm_cell_platform_probe
10390 + *
10391 + * If we're on a system without the firmware rtas call available, set up the
10392 + * PMC write-checker for all the pmX_event control registers.
10393 + **/
10394 +static void pfm_cell_platform_probe(void)
10395 +{
10396 + if (machine_is(celleb)) {
10397 + int cnum;
10398 + pfm_cell_pmu_conf.pmc_write_check = pfm_cell_pmc_check;
10399 + for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++)
10400 + pfm_cell_pmc_desc[cnum].type |= PFM_REG_WC;
10401 + }
10402 +
10403 + if (machine_is(ps3))
10404 + pfm_cell_pmu_info.platform_info = &ps3_platform_pmu_info;
10405 + else
10406 + pfm_cell_pmu_info.platform_info = &native_platform_pmu_info;
10407 +}
10408 +
10409 +static int __init pfm_cell_pmu_init_module(void)
10410 +{
10411 + pfm_cell_platform_probe();
10412 + return pfm_pmu_register(&pfm_cell_pmu_conf);
10413 +}
10414 +
10415 +static void __exit pfm_cell_pmu_cleanup_module(void)
10416 +{
10417 + pfm_pmu_unregister(&pfm_cell_pmu_conf);
10418 +}
10419 +
10420 +module_init(pfm_cell_pmu_init_module);
10421 +module_exit(pfm_cell_pmu_cleanup_module);
10422 diff --git a/arch/powerpc/perfmon/perfmon_power4.c b/arch/powerpc/perfmon/perfmon_power4.c
10423 new file mode 100644
10424 index 0000000..eba9e8c
10425 --- /dev/null
10426 +++ b/arch/powerpc/perfmon/perfmon_power4.c
10427 @@ -0,0 +1,309 @@
10428 +/*
10429 + * This file contains the POWER4 PMU register description tables
10430 + * and pmc checker used by perfmon.c.
10431 + *
10432 + * Copyright (c) 2007, IBM Corporation.
10433 + *
10434 + * Based on a simple modification of perfmon_power5.c for POWER4 by
10435 + * Corey Ashford <cjashfor@us.ibm.com>.
10436 + *
10437 + * This program is free software; you can redistribute it and/or
10438 + * modify it under the terms of version 2 of the GNU General Public
10439 + * License as published by the Free Software Foundation.
10440 + *
10441 + * This program is distributed in the hope that it will be useful,
10442 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10443 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10444 + * General Public License for more details.
10445 + *
10446 + * You should have received a copy of the GNU General Public License
10447 + * along with this program; if not, write to the Free Software
10448 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
10449 + * 02111-1307 USA
10450 + */
10451 +#include <linux/module.h>
10452 +#include <linux/perfmon_kern.h>
10453 +
10454 +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
10455 +MODULE_DESCRIPTION("POWER4 PMU description table");
10456 +MODULE_LICENSE("GPL");
10457 +
10458 +static struct pfm_regmap_desc pfm_power4_pmc_desc[] = {
10459 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
10460 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
10461 +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
10462 +};
10463 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power4_pmc_desc)
10464 +
10465 +/* The TB and PURR registers are read-only. Also, note that the TB register
10466 + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
10467 + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
10468 + */
10469 +static struct pfm_regmap_desc pfm_power4_pmd_desc[] = {
10470 +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
10471 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
10472 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
10473 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
10474 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
10475 +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
10476 +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
10477 +/* pmd7 */ PMD_D(PFM_REG_C, "PMC7", SPRN_PMC7),
10478 +/* pmd8 */ PMD_D(PFM_REG_C, "PMC8", SPRN_PMC8)
10479 +};
10480 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power4_pmd_desc)
10481 +
10482 +static int pfm_power4_probe_pmu(void)
10483 +{
10484 + unsigned long pvr = mfspr(SPRN_PVR);
10485 + int ver = PVR_VER(pvr);
10486 +
10487 + if ((ver == PV_POWER4) || (ver == PV_POWER4p))
10488 + return 0;
10489 +
10490 + return -1;
10491 +}
10492 +
10493 +static void pfm_power4_write_pmc(unsigned int cnum, u64 value)
10494 +{
10495 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
10496 + case SPRN_MMCR0:
10497 + mtspr(SPRN_MMCR0, value);
10498 + break;
10499 + case SPRN_MMCR1:
10500 + mtspr(SPRN_MMCR1, value);
10501 + break;
10502 + case SPRN_MMCRA:
10503 + mtspr(SPRN_MMCRA, value);
10504 + break;
10505 + default:
10506 + BUG();
10507 + }
10508 +}
10509 +
10510 +static void pfm_power4_write_pmd(unsigned int cnum, u64 value)
10511 +{
10512 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
10513 +
10514 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10515 + case SPRN_PMC1:
10516 + mtspr(SPRN_PMC1, value & ovfl_mask);
10517 + break;
10518 + case SPRN_PMC2:
10519 + mtspr(SPRN_PMC2, value & ovfl_mask);
10520 + break;
10521 + case SPRN_PMC3:
10522 + mtspr(SPRN_PMC3, value & ovfl_mask);
10523 + break;
10524 + case SPRN_PMC4:
10525 + mtspr(SPRN_PMC4, value & ovfl_mask);
10526 + break;
10527 + case SPRN_PMC5:
10528 + mtspr(SPRN_PMC5, value & ovfl_mask);
10529 + break;
10530 + case SPRN_PMC6:
10531 + mtspr(SPRN_PMC6, value & ovfl_mask);
10532 + break;
10533 + case SPRN_PMC7:
10534 + mtspr(SPRN_PMC7, value & ovfl_mask);
10535 + break;
10536 + case SPRN_PMC8:
10537 + mtspr(SPRN_PMC8, value & ovfl_mask);
10538 + break;
10539 + case SPRN_TBRL:
10540 + case SPRN_PURR:
10541 + /* Ignore writes to read-only registers. */
10542 + break;
10543 + default:
10544 + BUG();
10545 + }
10546 +}
10547 +
10548 +static u64 pfm_power4_read_pmd(unsigned int cnum)
10549 +{
10550 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10551 + case SPRN_PMC1:
10552 + return mfspr(SPRN_PMC1);
10553 + case SPRN_PMC2:
10554 + return mfspr(SPRN_PMC2);
10555 + case SPRN_PMC3:
10556 + return mfspr(SPRN_PMC3);
10557 + case SPRN_PMC4:
10558 + return mfspr(SPRN_PMC4);
10559 + case SPRN_PMC5:
10560 + return mfspr(SPRN_PMC5);
10561 + case SPRN_PMC6:
10562 + return mfspr(SPRN_PMC6);
10563 + case SPRN_PMC7:
10564 + return mfspr(SPRN_PMC7);
10565 + case SPRN_PMC8:
10566 + return mfspr(SPRN_PMC8);
10567 + case SPRN_TBRL:
10568 + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
10569 + case SPRN_PURR:
10570 + if (cpu_has_feature(CPU_FTR_PURR))
10571 + return mfspr(SPRN_PURR);
10572 + else
10573 + return 0;
10574 + default:
10575 + BUG();
10576 + }
10577 +}
10578 +
10579 +/* forward decl */
10580 +static void pfm_power4_disable_counters(struct pfm_context *ctx,
10581 + struct pfm_event_set *set);
10582 +
10583 +/**
10584 + * pfm_power4_enable_counters
10585 + *
10586 + **/
10587 +static void pfm_power4_enable_counters(struct pfm_context *ctx,
10588 + struct pfm_event_set *set)
10589 +{
10590 + unsigned int i, max_pmc;
10591 +
10592 + /* Make sure the counters are disabled before touching the other
10593 + control registers */
10594 + pfm_power4_disable_counters(ctx, set);
10595 +
10596 + max_pmc = ctx->regs.max_pmc;
10597 +
10598 + /* Write MMCR0 last, and a fairly easy way to do this is to write
10599 + the registers in the reverse order */
10600 + for (i = max_pmc; i != 0; i--)
10601 + if (test_bit(i - 1, set->used_pmcs))
10602 + pfm_power4_write_pmc(i - 1, set->pmcs[i - 1]);
10603 +}
10604 +
10605 +/**
10606 + * pfm_power4_disable_counters
10607 + *
10608 + **/
10609 +static void pfm_power4_disable_counters(struct pfm_context *ctx,
10610 + struct pfm_event_set *set)
10611 +{
10612 + /* Set the Freeze Counters bit */
10613 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
10614 + asm volatile ("sync");
10615 +}
10616 +
10617 +/**
10618 + * pfm_power4_get_ovfl_pmds
10619 + *
10620 + * Determine which counters in this set have overflowed and fill in the
10621 + * set->povfl_pmds mask and set->npend_ovfls count.
10622 + **/
10623 +static void pfm_power4_get_ovfl_pmds(struct pfm_context *ctx,
10624 + struct pfm_event_set *set)
10625 +{
10626 + unsigned int i;
10627 + unsigned int max_pmd = ctx->regs.max_intr_pmd;
10628 + u64 *used_pmds = set->used_pmds;
10629 + u64 *cntr_pmds = ctx->regs.cnt_pmds;
10630 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
10631 + u64 new_val, mask[PFM_PMD_BV];
10632 +
10633 + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds),
10634 + cast_ulp(used_pmds), max_pmd);
10635 +
10636 + for (i = 0; i < max_pmd; i++) {
10637 + if (test_bit(i, mask)) {
10638 + new_val = pfm_power4_read_pmd(i);
10639 + if (new_val & width_mask) {
10640 + set_bit(i, set->povfl_pmds);
10641 + set->npend_ovfls++;
10642 + }
10643 + }
10644 + }
10645 +}
10646 +
10647 +static void pfm_power4_irq_handler(struct pt_regs *regs,
10648 + struct pfm_context *ctx)
10649 +{
10650 + u32 mmcr0;
10651 +
10652 + /* Disable the counters (set the freeze bit) to not polute
10653 + * the counts.
10654 + */
10655 + mmcr0 = mfspr(SPRN_MMCR0);
10656 + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
10657 +
10658 + /* Set the PMM bit (see comment below). */
10659 + mtmsrd(mfmsr() | MSR_PMM);
10660 +
10661 + pfm_interrupt_handler(instruction_pointer(regs), regs);
10662 +
10663 + mmcr0 = mfspr(SPRN_MMCR0);
10664 +
10665 + /*
10666 + * Reset the perfmon trigger if
10667 + * not in masking mode.
10668 + */
10669 + if (ctx->state != PFM_CTX_MASKED)
10670 + mmcr0 |= MMCR0_PMXE;
10671 +
10672 + /*
10673 + * We must clear the PMAO bit on some (GQ) chips. Just do it
10674 + * all the time.
10675 + */
10676 + mmcr0 &= ~MMCR0_PMAO;
10677 +
10678 + /*
10679 + * Now clear the freeze bit, counting will not start until we
10680 + * rfid from this exception, because only at that point will
10681 + * the PMM bit be cleared.
10682 + */
10683 + mmcr0 &= ~MMCR0_FC;
10684 + mtspr(SPRN_MMCR0, mmcr0);
10685 +}
10686 +
10687 +static void pfm_power4_resend_irq(struct pfm_context *ctx)
10688 +{
10689 + /*
10690 + * Assert the PMAO bit to cause a PMU interrupt. Make sure we
10691 + * trigger the edge detection circuitry for PMAO
10692 + */
10693 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
10694 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
10695 +}
10696 +
10697 +struct pfm_arch_pmu_info pfm_power4_pmu_info = {
10698 + .pmu_style = PFM_POWERPC_PMU_POWER4,
10699 + .write_pmc = pfm_power4_write_pmc,
10700 + .write_pmd = pfm_power4_write_pmd,
10701 + .read_pmd = pfm_power4_read_pmd,
10702 + .irq_handler = pfm_power4_irq_handler,
10703 + .get_ovfl_pmds = pfm_power4_get_ovfl_pmds,
10704 + .enable_counters = pfm_power4_enable_counters,
10705 + .disable_counters = pfm_power4_disable_counters,
10706 + .resend_irq = pfm_power4_resend_irq
10707 +};
10708 +
10709 +/*
10710 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
10711 + */
10712 +static struct pfm_pmu_config pfm_power4_pmu_conf = {
10713 + .pmu_name = "POWER4",
10714 + .counter_width = 31,
10715 + .pmd_desc = pfm_power4_pmd_desc,
10716 + .pmc_desc = pfm_power4_pmc_desc,
10717 + .num_pmc_entries = PFM_PM_NUM_PMCS,
10718 + .num_pmd_entries = PFM_PM_NUM_PMDS,
10719 + .probe_pmu = pfm_power4_probe_pmu,
10720 + .pmu_info = &pfm_power4_pmu_info,
10721 + .flags = PFM_PMU_BUILTIN_FLAG,
10722 + .owner = THIS_MODULE
10723 +};
10724 +
10725 +static int __init pfm_power4_pmu_init_module(void)
10726 +{
10727 + return pfm_pmu_register(&pfm_power4_pmu_conf);
10728 +}
10729 +
10730 +static void __exit pfm_power4_pmu_cleanup_module(void)
10731 +{
10732 + pfm_pmu_unregister(&pfm_power4_pmu_conf);
10733 +}
10734 +
10735 +module_init(pfm_power4_pmu_init_module);
10736 +module_exit(pfm_power4_pmu_cleanup_module);
10737 diff --git a/arch/powerpc/perfmon/perfmon_power5.c b/arch/powerpc/perfmon/perfmon_power5.c
10738 new file mode 100644
10739 index 0000000..f4bb1ac
10740 --- /dev/null
10741 +++ b/arch/powerpc/perfmon/perfmon_power5.c
10742 @@ -0,0 +1,326 @@
10743 +/*
10744 + * This file contains the POWER5 PMU register description tables
10745 + * and pmc checker used by perfmon.c.
10746 + *
10747 + * Copyright (c) 2005 David Gibson, IBM Corporation.
10748 + *
10749 + * Based on perfmon_p6.c:
10750 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
10751 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
10752 + *
10753 + * This program is free software; you can redistribute it and/or
10754 + * modify it under the terms of version 2 of the GNU General Public
10755 + * License as published by the Free Software Foundation.
10756 + *
10757 + * This program is distributed in the hope that it will be useful,
10758 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
10759 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10760 + * General Public License for more details.
10761 + *
10762 + * You should have received a copy of the GNU General Public License
10763 + * along with this program; if not, write to the Free Software
10764 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
10765 + * 02111-1307 USA
10766 + */
10767 +#include <linux/module.h>
10768 +#include <linux/perfmon_kern.h>
10769 +
10770 +MODULE_AUTHOR("David Gibson <dwg@au1.ibm.com>");
10771 +MODULE_DESCRIPTION("POWER5 PMU description table");
10772 +MODULE_LICENSE("GPL");
10773 +
10774 +static struct pfm_regmap_desc pfm_power5_pmc_desc[] = {
10775 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
10776 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
10777 +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
10778 +};
10779 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power5_pmc_desc)
10780 +
10781 +/* The TB and PURR registers are read-only. Also, note that the TB register
10782 + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
10783 + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
10784 + */
10785 +static struct pfm_regmap_desc pfm_power5_pmd_desc[] = {
10786 +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
10787 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
10788 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
10789 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
10790 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
10791 +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
10792 +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
10793 +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
10794 +};
10795 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power5_pmd_desc)
10796 +
10797 +/* forward decl */
10798 +static void pfm_power5_disable_counters(struct pfm_context *ctx,
10799 + struct pfm_event_set *set);
10800 +
10801 +static int pfm_power5_probe_pmu(void)
10802 +{
10803 + unsigned long pvr = mfspr(SPRN_PVR);
10804 +
10805 + switch (PVR_VER(pvr)) {
10806 + case PV_POWER5:
10807 + return 0;
10808 + case PV_POWER5p:
10809 + return (PVR_REV(pvr) < 0x300) ? 0 : -1;
10810 + default:
10811 + return -1;
10812 + }
10813 +}
10814 +
10815 +static void pfm_power5_write_pmc(unsigned int cnum, u64 value)
10816 +{
10817 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
10818 + case SPRN_MMCR0:
10819 + mtspr(SPRN_MMCR0, value);
10820 + break;
10821 + case SPRN_MMCR1:
10822 + mtspr(SPRN_MMCR1, value);
10823 + break;
10824 + case SPRN_MMCRA:
10825 + mtspr(SPRN_MMCRA, value);
10826 + break;
10827 + default:
10828 + BUG();
10829 + }
10830 +}
10831 +
10832 +static void pfm_power5_write_pmd(unsigned int cnum, u64 value)
10833 +{
10834 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
10835 +
10836 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10837 + case SPRN_PMC1:
10838 + mtspr(SPRN_PMC1, value & ovfl_mask);
10839 + break;
10840 + case SPRN_PMC2:
10841 + mtspr(SPRN_PMC2, value & ovfl_mask);
10842 + break;
10843 + case SPRN_PMC3:
10844 + mtspr(SPRN_PMC3, value & ovfl_mask);
10845 + break;
10846 + case SPRN_PMC4:
10847 + mtspr(SPRN_PMC4, value & ovfl_mask);
10848 + break;
10849 + case SPRN_PMC5:
10850 + mtspr(SPRN_PMC5, value & ovfl_mask);
10851 + break;
10852 + case SPRN_PMC6:
10853 + mtspr(SPRN_PMC6, value & ovfl_mask);
10854 + break;
10855 + case SPRN_TBRL:
10856 + case SPRN_PURR:
10857 + /* Ignore writes to read-only registers. */
10858 + break;
10859 + default:
10860 + BUG();
10861 + }
10862 +}
10863 +
10864 +static u64 pfm_power5_read_pmd(unsigned int cnum)
10865 +{
10866 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
10867 + case SPRN_PMC1:
10868 + return mfspr(SPRN_PMC1);
10869 + case SPRN_PMC2:
10870 + return mfspr(SPRN_PMC2);
10871 + case SPRN_PMC3:
10872 + return mfspr(SPRN_PMC3);
10873 + case SPRN_PMC4:
10874 + return mfspr(SPRN_PMC4);
10875 + case SPRN_PMC5:
10876 + return mfspr(SPRN_PMC5);
10877 + case SPRN_PMC6:
10878 + return mfspr(SPRN_PMC6);
10879 + case SPRN_TBRL:
10880 + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
10881 + case SPRN_PURR:
10882 + if (cpu_has_feature(CPU_FTR_PURR))
10883 + return mfspr(SPRN_PURR);
10884 + else
10885 + return 0;
10886 + default:
10887 + BUG();
10888 + }
10889 +}
10890 +
10891 +/**
10892 + * pfm_power5_enable_counters
10893 + *
10894 + **/
10895 +static void pfm_power5_enable_counters(struct pfm_context *ctx,
10896 + struct pfm_event_set *set)
10897 +{
10898 + unsigned int i, max_pmc;
10899 +
10900 + /*
10901 + * Make sure the counters are disabled before touching the
10902 + * other control registers
10903 + */
10904 + pfm_power5_disable_counters(ctx, set);
10905 +
10906 + max_pmc = ctx->regs.max_pmc;
10907 +
10908 + /*
10909 + * Write MMCR0 last, and a fairly easy way to do
10910 + * this is to write the registers in the reverse
10911 + * order
10912 + */
10913 + for (i = max_pmc; i != 0; i--)
10914 + if (test_bit(i - 1, set->used_pmcs))
10915 + pfm_power5_write_pmc(i - 1, set->pmcs[i - 1]);
10916 +}
10917 +
10918 +/**
10919 + * pfm_power5_disable_counters
10920 + *
10921 + * Just need to zero all the control registers.
10922 + **/
10923 +static void pfm_power5_disable_counters(struct pfm_context *ctx,
10924 + struct pfm_event_set *set)
10925 +{
10926 + /* Set the Freeze Counters bit */
10927 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
10928 + asm volatile ("sync");
10929 +}
10930 +
10931 +/**
10932 + * pfm_power5_get_ovfl_pmds
10933 + *
10934 + * Determine which counters in this set have overflowed and fill in the
10935 + * set->povfl_pmds mask and set->npend_ovfls count.
10936 + **/
10937 +static void pfm_power5_get_ovfl_pmds(struct pfm_context *ctx,
10938 + struct pfm_event_set *set)
10939 +{
10940 + unsigned int i;
10941 + unsigned int max = ctx->regs.max_intr_pmd;
10942 + u64 *used_pmds = set->used_pmds;
10943 + u64 *intr_pmds = ctx->regs.intr_pmds;
10944 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
10945 + u64 new_val, mask[PFM_PMD_BV];
10946 +
10947 + bitmap_and(cast_ulp(mask), cast_ulp(intr_pmds),
10948 + cast_ulp(used_pmds), max);
10949 + /*
10950 + * If either PMC5 or PMC6 are not being used, just zero out the unused
10951 + * ones so that they won't interrupt again for another 2^31 counts.
10952 + * Note that if no other counters overflowed, set->npend_ovfls will
10953 + * be zero upon returning from this call (i.e. a spurious
10954 + * interrupt), but that should be ok.
10955 + *
10956 + * If neither PMC5 nor PMC6 are used, the counters should be frozen
10957 + * via MMCR0_FC5_6 and zeroed out.
10958 + *
10959 + * If both PMC5 and PMC6 are used, they can be handled correctly by
10960 + * the loop that follows.
10961 + */
10962 +
10963 + if (!test_bit(5, cast_ulp(used_pmds)))
10964 + mtspr(SPRN_PMC5, 0);
10965 + if (!test_bit(6, cast_ulp(used_pmds)))
10966 + mtspr(SPRN_PMC6, 0);
10967 +
10968 + for (i = 0; i < max; i++) {
10969 + if (test_bit(i, mask)) {
10970 + new_val = pfm_power5_read_pmd(i);
10971 + if (new_val & width_mask) {
10972 + set_bit(i, set->povfl_pmds);
10973 + set->npend_ovfls++;
10974 + }
10975 + }
10976 + }
10977 +}
10978 +
10979 +static void pfm_power5_irq_handler(struct pt_regs *regs,
10980 + struct pfm_context *ctx)
10981 +{
10982 + u32 mmcr0;
10983 +
10984 + /* Disable the counters (set the freeze bit) to not polute
10985 + * the counts.
10986 + */
10987 + mmcr0 = mfspr(SPRN_MMCR0);
10988 + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
10989 +
10990 + /* Set the PMM bit (see comment below). */
10991 + mtmsrd(mfmsr() | MSR_PMM);
10992 +
10993 + pfm_interrupt_handler(instruction_pointer(regs), regs);
10994 +
10995 + mmcr0 = mfspr(SPRN_MMCR0);
10996 +
10997 + /*
10998 + * Reset the perfmon trigger if
10999 + * not in masking mode.
11000 + */
11001 + if (ctx->state != PFM_CTX_MASKED)
11002 + mmcr0 |= MMCR0_PMXE;
11003 +
11004 + /*
11005 + * We must clear the PMAO bit on some (GQ) chips. Just do it
11006 + * all the time.
11007 + */
11008 + mmcr0 &= ~MMCR0_PMAO;
11009 +
11010 + /*
11011 + * Now clear the freeze bit, counting will not start until we
11012 + * rfid from this exception, because only at that point will
11013 + * the PMM bit be cleared.
11014 + */
11015 + mmcr0 &= ~MMCR0_FC;
11016 + mtspr(SPRN_MMCR0, mmcr0);
11017 +}
11018 +
11019 +static void pfm_power5_resend_irq(struct pfm_context *ctx)
11020 +{
11021 + /*
11022 + * Assert the PMAO bit to cause a PMU interrupt. Make sure we
11023 + * trigger the edge detection circuitry for PMAO
11024 + */
11025 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
11026 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
11027 +}
11028 +
11029 +struct pfm_arch_pmu_info pfm_power5_pmu_info = {
11030 + .pmu_style = PFM_POWERPC_PMU_POWER5,
11031 + .write_pmc = pfm_power5_write_pmc,
11032 + .write_pmd = pfm_power5_write_pmd,
11033 + .read_pmd = pfm_power5_read_pmd,
11034 + .irq_handler = pfm_power5_irq_handler,
11035 + .get_ovfl_pmds = pfm_power5_get_ovfl_pmds,
11036 + .enable_counters = pfm_power5_enable_counters,
11037 + .disable_counters = pfm_power5_disable_counters,
11038 + .resend_irq = pfm_power5_resend_irq
11039 +};
11040 +
11041 +/*
11042 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
11043 + */
11044 +static struct pfm_pmu_config pfm_power5_pmu_conf = {
11045 + .pmu_name = "POWER5",
11046 + .counter_width = 31,
11047 + .pmd_desc = pfm_power5_pmd_desc,
11048 + .pmc_desc = pfm_power5_pmc_desc,
11049 + .num_pmc_entries = PFM_PM_NUM_PMCS,
11050 + .num_pmd_entries = PFM_PM_NUM_PMDS,
11051 + .probe_pmu = pfm_power5_probe_pmu,
11052 + .pmu_info = &pfm_power5_pmu_info,
11053 + .flags = PFM_PMU_BUILTIN_FLAG,
11054 + .owner = THIS_MODULE
11055 +};
11056 +
11057 +static int __init pfm_power5_pmu_init_module(void)
11058 +{
11059 + return pfm_pmu_register(&pfm_power5_pmu_conf);
11060 +}
11061 +
11062 +static void __exit pfm_power5_pmu_cleanup_module(void)
11063 +{
11064 + pfm_pmu_unregister(&pfm_power5_pmu_conf);
11065 +}
11066 +
11067 +module_init(pfm_power5_pmu_init_module);
11068 +module_exit(pfm_power5_pmu_cleanup_module);
11069 diff --git a/arch/powerpc/perfmon/perfmon_power6.c b/arch/powerpc/perfmon/perfmon_power6.c
11070 new file mode 100644
11071 index 0000000..7882feb
11072 --- /dev/null
11073 +++ b/arch/powerpc/perfmon/perfmon_power6.c
11074 @@ -0,0 +1,520 @@
11075 +/*
11076 + * This file contains the POWER6 PMU register description tables
11077 + * and pmc checker used by perfmon.c.
11078 + *
11079 + * Copyright (c) 2007, IBM Corporation
11080 + *
11081 + * Based on perfmon_power5.c, and written by Carl Love <carll@us.ibm.com>
11082 + * and Kevin Corry <kevcorry@us.ibm.com>. Some fixes and refinement by
11083 + * Corey Ashford <cjashfor@us.ibm.com>
11084 + *
11085 + * This program is free software; you can redistribute it and/or
11086 + * modify it under the terms of version 2 of the GNU General Public
11087 + * License as published by the Free Software Foundation.
11088 + *
11089 + * This program is distributed in the hope that it will be useful,
11090 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11091 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11092 + * General Public License for more details.
11093 + *
11094 + * You should have received a copy of the GNU General Public License
11095 + * along with this program; if not, write to the Free Software
11096 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
11097 + * 02111-1307 USA
11098 + */
11099 +#include <linux/module.h>
11100 +#include <linux/perfmon_kern.h>
11101 +
11102 +MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
11103 +MODULE_DESCRIPTION("POWER6 PMU description table");
11104 +MODULE_LICENSE("GPL");
11105 +
11106 +static struct pfm_regmap_desc pfm_power6_pmc_desc[] = {
11107 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
11108 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
11109 +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
11110 +};
11111 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power6_pmc_desc)
11112 +#define PFM_DELTA_TB 10000 /* Not a real registers */
11113 +#define PFM_DELTA_PURR 10001
11114 +
11115 +/*
11116 + * counters wrap to zero at transition from 2^32-1 to 2^32. Note:
11117 + * interrupt generated at transition from 2^31-1 to 2^31
11118 + */
11119 +#define OVERFLOW_VALUE 0x100000000UL
11120 +
11121 +/* The TB and PURR registers are read-only. Also, note that the TB register
11122 + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
11123 + * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
11124 + */
11125 +static struct pfm_regmap_desc pfm_power6_pmd_desc[] = {
11126 + /* On POWER 6 PMC5 and PMC6 are not writable, they do not
11127 + * generate interrupts, and do not qualify their counts
11128 + * based on problem mode, supervisor mode or hypervisor mode.
11129 + * These two counters are implemented as virtual counters
11130 + * to make the appear to work like the other counters. A
11131 + * kernel timer is used sample the real PMC5 and PMC6 and
11132 + * update the virtual counters.
11133 + */
11134 +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
11135 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
11136 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
11137 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
11138 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
11139 +/* pmd5 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC5", SPRN_PMC5),
11140 +/* pmd6 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC6", SPRN_PMC6),
11141 +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
11142 +/* delta purr */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_TB", PFM_DELTA_TB),
11143 +/* delta tb */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_PURR", PFM_DELTA_PURR),
11144 +};
11145 +
11146 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power6_pmd_desc)
11147 +
11148 +u32 pmc5_start_save[NR_CPUS];
11149 +u32 pmc6_start_save[NR_CPUS];
11150 +
11151 +static struct timer_list pmc5_6_update[NR_CPUS];
11152 +u64 enable_cntrs_cnt;
11153 +u64 disable_cntrs_cnt;
11154 +u64 call_delta;
11155 +u64 pm5_6_interrupt;
11156 +u64 pm1_4_interrupt;
11157 +/* need ctx_arch for kernel timer. Can't get it in context of the kernel
11158 + * timer.
11159 + */
11160 +struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS];
11161 +long int update_time;
11162 +
11163 +static void delta(int cpu_num, struct pfm_arch_context *ctx_arch)
11164 +{
11165 + u32 tmp5, tmp6;
11166 +
11167 + call_delta++;
11168 +
11169 + tmp5 = (u32) mfspr(SPRN_PMC5);
11170 + tmp6 = (u32) mfspr(SPRN_PMC6);
11171 +
11172 + /*
11173 + * The following difference calculation relies on 32-bit modular
11174 + * arithmetic for the deltas to come out correct (especially in the
11175 + * presence of a 32-bit counter wrap).
11176 + */
11177 + ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]);
11178 + ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]);
11179 +
11180 + pmc5_start_save[cpu_num] = tmp5;
11181 + pmc6_start_save[cpu_num] = tmp6;
11182 +
11183 + return;
11184 +}
11185 +
11186 +
11187 +static void pmc5_6_updater(unsigned long cpu_num)
11188 +{
11189 + /* update the virtual pmd 5 and pmd 6 counters */
11190 +
11191 + delta(cpu_num, pmc5_6_ctx_arch[cpu_num]);
11192 + mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time);
11193 +}
11194 +
11195 +
11196 +static int pfm_power6_probe_pmu(void)
11197 +{
11198 + unsigned long pvr = mfspr(SPRN_PVR);
11199 +
11200 + switch (PVR_VER(pvr)) {
11201 + case PV_POWER6:
11202 + return 0;
11203 + case PV_POWER5p:
11204 + /* If this is a POWER5+ and the revision is less than 0x300,
11205 + don't treat it as a POWER6. */
11206 + return (PVR_REV(pvr) < 0x300) ? -1 : 0;
11207 + default:
11208 + return -1;
11209 + }
11210 +}
11211 +
11212 +static void pfm_power6_write_pmc(unsigned int cnum, u64 value)
11213 +{
11214 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
11215 + case SPRN_MMCR0:
11216 + mtspr(SPRN_MMCR0, value);
11217 + break;
11218 + case SPRN_MMCR1:
11219 + mtspr(SPRN_MMCR1, value);
11220 + break;
11221 + case SPRN_MMCRA:
11222 + mtspr(SPRN_MMCRA, value);
11223 + break;
11224 + default:
11225 + BUG();
11226 + }
11227 +}
11228 +
11229 +static void pfm_power6_write_pmd(unsigned int cnum, u64 value)
11230 +{
11231 + /* On POWER 6 PMC5 and PMC6 are implemented as
11232 + * virtual counters. See comment in pfm_power6_pmd_desc
11233 + * definition.
11234 + */
11235 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
11236 +
11237 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11238 + case SPRN_PMC1:
11239 + mtspr(SPRN_PMC1, value & ovfl_mask);
11240 + break;
11241 + case SPRN_PMC2:
11242 + mtspr(SPRN_PMC2, value & ovfl_mask);
11243 + break;
11244 + case SPRN_PMC3:
11245 + mtspr(SPRN_PMC3, value & ovfl_mask);
11246 + break;
11247 + case SPRN_PMC4:
11248 + mtspr(SPRN_PMC4, value & ovfl_mask);
11249 + break;
11250 + case SPRN_TBRL:
11251 + case SPRN_PURR:
11252 + /* Ignore writes to read-only registers. */
11253 + break;
11254 + default:
11255 + BUG();
11256 + }
11257 +}
11258 +
11259 +static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum)
11260 +{
11261 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
11262 + int cpu_num = smp_processor_id();
11263 +
11264 + /* On POWER 6 PMC5 and PMC6 are implemented as
11265 + * virtual counters. See comment in pfm_power6_pmd_desc
11266 + * definition.
11267 + */
11268 +
11269 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11270 + case SPRN_PMC5:
11271 + return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]);
11272 + break;
11273 +
11274 + case SPRN_PMC6:
11275 + return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]);
11276 + break;
11277 +
11278 + case PFM_DELTA_TB:
11279 + return ctx_arch->delta_tb
11280 + + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
11281 + - ctx_arch->delta_tb_start;
11282 + break;
11283 +
11284 + case PFM_DELTA_PURR:
11285 + return ctx_arch->delta_purr
11286 + + mfspr(SPRN_PURR)
11287 + - ctx_arch->delta_purr_start;
11288 + break;
11289 +
11290 + default:
11291 + BUG();
11292 + }
11293 +}
11294 +
11295 +void pfm_power6_swrite(struct pfm_context *ctx, unsigned int cnum,
11296 + u64 val)
11297 +{
11298 + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
11299 + int cpu_num = smp_processor_id();
11300 +
11301 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11302 + case SPRN_PMC5:
11303 + pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
11304 + ctx_arch->powergs_pmc5 = val;
11305 + break;
11306 +
11307 + case SPRN_PMC6:
11308 + pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
11309 + ctx_arch->powergs_pmc6 = val;
11310 + break;
11311 +
11312 + case PFM_DELTA_TB:
11313 + ctx_arch->delta_tb_start =
11314 + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL));
11315 + ctx_arch->delta_tb = val;
11316 + break;
11317 +
11318 + case PFM_DELTA_PURR:
11319 + ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
11320 + ctx_arch->delta_purr = val;
11321 + break;
11322 +
11323 + default:
11324 + BUG();
11325 + }
11326 +}
11327 +
11328 +static u64 pfm_power6_read_pmd(unsigned int cnum)
11329 +{
11330 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11331 + case SPRN_PMC1:
11332 + return mfspr(SPRN_PMC1);
11333 + case SPRN_PMC2:
11334 + return mfspr(SPRN_PMC2);
11335 + case SPRN_PMC3:
11336 + return mfspr(SPRN_PMC3);
11337 + case SPRN_PMC4:
11338 + return mfspr(SPRN_PMC4);
11339 + case SPRN_TBRL:
11340 + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
11341 + case SPRN_PURR:
11342 + if (cpu_has_feature(CPU_FTR_PURR))
11343 + return mfspr(SPRN_PURR);
11344 + else
11345 + return 0;
11346 + default:
11347 + BUG();
11348 + }
11349 +}
11350 +
11351 +
11352 +/**
11353 + * pfm_power6_enable_counters
11354 + *
11355 + **/
11356 +static void pfm_power6_enable_counters(struct pfm_context *ctx,
11357 + struct pfm_event_set *set)
11358 +{
11359 +
11360 + unsigned int i, max_pmc;
11361 + int cpu_num = smp_processor_id();
11362 + struct pfm_arch_context *ctx_arch;
11363 +
11364 + enable_cntrs_cnt++;
11365 +
11366 + /* need the ctx passed down to the routine */
11367 + ctx_arch = pfm_ctx_arch(ctx);
11368 + max_pmc = ctx->regs.max_pmc;
11369 +
11370 + /* Write MMCR0 last, and a fairly easy way to do this is to write
11371 + the registers in the reverse order */
11372 + for (i = max_pmc; i != 0; i--)
11373 + if (test_bit(i - 1, set->used_pmcs))
11374 + pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]);
11375 +
11376 + /* save current free running HW event count */
11377 + pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
11378 + pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
11379 +
11380 + ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
11381 +
11382 + if (cpu_has_feature(CPU_FTR_PURR))
11383 + ctx_arch->delta_tb_start =
11384 + ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
11385 + else
11386 + ctx_arch->delta_tb_start = 0;
11387 +
11388 + /* Start kernel timer for this cpu to periodically update
11389 + * the virtual counters.
11390 + */
11391 + init_timer(&pmc5_6_update[cpu_num]);
11392 + pmc5_6_update[cpu_num].function = pmc5_6_updater;
11393 + pmc5_6_update[cpu_num].data = (unsigned long) cpu_num;
11394 + pmc5_6_update[cpu_num].expires = jiffies + update_time;
11395 + /* context for this timer, timer will be removed if context
11396 + * is switched because the counters will be stopped first.
11397 + * NEEDS WORK, I think this is all ok, a little concerned about a
11398 + * race between the kernel timer going off right as the counters
11399 + * are being stopped and the context switching. Need to think
11400 + * about this.
11401 + */
11402 + pmc5_6_ctx_arch[cpu_num] = ctx_arch;
11403 + add_timer(&pmc5_6_update[cpu_num]);
11404 +}
11405 +
11406 +/**
11407 + * pfm_power6_disable_counters
11408 + *
11409 + **/
11410 +static void pfm_power6_disable_counters(struct pfm_context *ctx,
11411 + struct pfm_event_set *set)
11412 +{
11413 + struct pfm_arch_context *ctx_arch;
11414 + int cpu_num = smp_processor_id();
11415 +
11416 + disable_cntrs_cnt++;
11417 +
11418 + /* Set the Freeze Counters bit */
11419 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
11420 + asm volatile ("sync");
11421 +
11422 + /* delete kernel update timer */
11423 + del_timer_sync(&pmc5_6_update[cpu_num]);
11424 +
11425 + /* Update the virtual pmd 5 and 6 counters from the free running
11426 + * HW counters
11427 + */
11428 + ctx_arch = pfm_ctx_arch(ctx);
11429 + delta(cpu_num, ctx_arch);
11430 +
11431 + ctx_arch->delta_tb +=
11432 + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
11433 + - ctx_arch->delta_tb_start;
11434 +
11435 + ctx_arch->delta_purr += mfspr(SPRN_PURR)
11436 + - ctx_arch->delta_purr_start;
11437 +}
11438 +
11439 +/**
11440 + * pfm_power6_get_ovfl_pmds
11441 + *
11442 + * Determine which counters in this set have overflowed and fill in the
11443 + * set->povfl_pmds mask and set->npend_ovfls count.
11444 + **/
11445 +static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx,
11446 + struct pfm_event_set *set)
11447 +{
11448 + unsigned int i;
11449 + unsigned int first_intr_pmd = ctx->regs.first_intr_pmd;
11450 + unsigned int max_intr_pmd = ctx->regs.max_intr_pmd;
11451 + u64 *used_pmds = set->used_pmds;
11452 + u64 *cntr_pmds = ctx->regs.cnt_pmds;
11453 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
11454 + u64 new_val, mask[PFM_PMD_BV];
11455 +
11456 + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), max_intr_pmd);
11457 +
11458 + /* max_intr_pmd is actually the last interrupting pmd register + 1 */
11459 + for (i = first_intr_pmd; i < max_intr_pmd; i++) {
11460 + if (test_bit(i, mask)) {
11461 + new_val = pfm_power6_read_pmd(i);
11462 + if (new_val & width_mask) {
11463 + set_bit(i, set->povfl_pmds);
11464 + set->npend_ovfls++;
11465 + }
11466 + }
11467 + }
11468 +}
11469 +
11470 +static void pfm_power6_irq_handler(struct pt_regs *regs,
11471 + struct pfm_context *ctx)
11472 +{
11473 + u32 mmcr0;
11474 + u64 mmcra;
11475 +
11476 + /* Disable the counters (set the freeze bit) to not polute
11477 + * the counts.
11478 + */
11479 + mmcr0 = mfspr(SPRN_MMCR0);
11480 + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
11481 + mmcra = mfspr(SPRN_MMCRA);
11482 +
11483 + /* Set the PMM bit (see comment below). */
11484 + mtmsrd(mfmsr() | MSR_PMM);
11485 +
11486 + pm1_4_interrupt++;
11487 +
11488 + pfm_interrupt_handler(instruction_pointer(regs), regs);
11489 +
11490 + mmcr0 = mfspr(SPRN_MMCR0);
11491 +
11492 + /*
11493 + * Reset the perfmon trigger if
11494 + * not in masking mode.
11495 + */
11496 + if (ctx->state != PFM_CTX_MASKED)
11497 + mmcr0 |= MMCR0_PMXE;
11498 +
11499 + /*
11500 + * Clear the PMU Alert Occurred bit
11501 + */
11502 + mmcr0 &= ~MMCR0_PMAO;
11503 +
11504 + /* Clear the appropriate bits in the MMCRA. */
11505 + mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER);
11506 + mtspr(SPRN_MMCRA, mmcra);
11507 +
11508 + /*
11509 + * Now clear the freeze bit, counting will not start until we
11510 + * rfid from this exception, because only at that point will
11511 + * the PMM bit be cleared.
11512 + */
11513 + mmcr0 &= ~MMCR0_FC;
11514 + mtspr(SPRN_MMCR0, mmcr0);
11515 +}
11516 +
11517 +static void pfm_power6_resend_irq(struct pfm_context *ctx)
11518 +{
11519 + /*
11520 + * Assert the PMAO bit to cause a PMU interrupt. Make sure we
11521 + * trigger the edge detection circuitry for PMAO
11522 + */
11523 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
11524 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
11525 +}
11526 +
11527 +struct pfm_arch_pmu_info pfm_power6_pmu_info = {
11528 + .pmu_style = PFM_POWERPC_PMU_POWER6,
11529 + .write_pmc = pfm_power6_write_pmc,
11530 + .write_pmd = pfm_power6_write_pmd,
11531 + .read_pmd = pfm_power6_read_pmd,
11532 + .irq_handler = pfm_power6_irq_handler,
11533 + .get_ovfl_pmds = pfm_power6_get_ovfl_pmds,
11534 + .enable_counters = pfm_power6_enable_counters,
11535 + .disable_counters = pfm_power6_disable_counters,
11536 + .resend_irq = pfm_power6_resend_irq
11537 +};
11538 +
11539 +/*
11540 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
11541 + */
11542 +static struct pfm_pmu_config pfm_power6_pmu_conf = {
11543 + .pmu_name = "POWER6",
11544 + .counter_width = 31,
11545 + .pmd_desc = pfm_power6_pmd_desc,
11546 + .pmc_desc = pfm_power6_pmc_desc,
11547 + .num_pmc_entries = PFM_PM_NUM_PMCS,
11548 + .num_pmd_entries = PFM_PM_NUM_PMDS,
11549 + .probe_pmu = pfm_power6_probe_pmu,
11550 + .pmu_info = &pfm_power6_pmu_info,
11551 + .pmd_sread = pfm_power6_sread,
11552 + .pmd_swrite = pfm_power6_swrite,
11553 + .flags = PFM_PMU_BUILTIN_FLAG,
11554 + .owner = THIS_MODULE
11555 +};
11556 +
11557 +static int __init pfm_power6_pmu_init_module(void)
11558 +{
11559 + int ret;
11560 + disable_cntrs_cnt = 0;
11561 + enable_cntrs_cnt = 0;
11562 + call_delta = 0;
11563 + pm5_6_interrupt = 0;
11564 + pm1_4_interrupt = 0;
11565 +
11566 + /* calculate the time for updating counters 5 and 6 */
11567 +
11568 + /*
11569 + * MAX_EVENT_RATE assumes a max instruction issue rate of 2
11570 + * instructions per clock cycle. Experience shows that this factor
11571 + * of 2 is more than adequate.
11572 + */
11573 +
11574 +# define MAX_EVENT_RATE (ppc_proc_freq * 2)
11575 +
11576 + /*
11577 + * Calculate the time, in jiffies, it takes for event counter 5 or
11578 + * 6 to completely wrap when counting at the max event rate, and
11579 + * then figure on sampling at twice that rate.
11580 + */
11581 + update_time = (((unsigned long)HZ * OVERFLOW_VALUE)
11582 + / ((unsigned long)MAX_EVENT_RATE)) / 2;
11583 +
11584 + ret = pfm_pmu_register(&pfm_power6_pmu_conf);
11585 + return ret;
11586 +}
11587 +
11588 +static void __exit pfm_power6_pmu_cleanup_module(void)
11589 +{
11590 + pfm_pmu_unregister(&pfm_power6_pmu_conf);
11591 +}
11592 +
11593 +module_init(pfm_power6_pmu_init_module);
11594 +module_exit(pfm_power6_pmu_cleanup_module);
11595 diff --git a/arch/powerpc/perfmon/perfmon_ppc32.c b/arch/powerpc/perfmon/perfmon_ppc32.c
11596 new file mode 100644
11597 index 0000000..76f0b84
11598 --- /dev/null
11599 +++ b/arch/powerpc/perfmon/perfmon_ppc32.c
11600 @@ -0,0 +1,340 @@
11601 +/*
11602 + * This file contains the PPC32 PMU register description tables
11603 + * and pmc checker used by perfmon.c.
11604 + *
11605 + * Philip Mucci, mucci@cs.utk.edu
11606 + *
11607 + * Based on code from:
11608 + * Copyright (c) 2005 David Gibson, IBM Corporation.
11609 + *
11610 + * Based on perfmon_p6.c:
11611 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
11612 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
11613 + *
11614 + * This program is free software; you can redistribute it and/or
11615 + * modify it under the terms of version 2 of the GNU General Public
11616 + * License as published by the Free Software Foundation.
11617 + *
11618 + * This program is distributed in the hope that it will be useful,
11619 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
11620 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11621 + * General Public License for more details.
11622 + *
11623 + * You should have received a copy of the GNU General Public License
11624 + * along with this program; if not, write to the Free Software
11625 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
11626 + * 02111-1307 USA
11627 + */
11628 +#include <linux/module.h>
11629 +#include <linux/perfmon_kern.h>
11630 +#include <asm/reg.h>
11631 +
11632 +MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>");
11633 +MODULE_DESCRIPTION("PPC32 PMU description table");
11634 +MODULE_LICENSE("GPL");
11635 +
11636 +static struct pfm_pmu_config pfm_ppc32_pmu_conf;
11637 +
11638 +static struct pfm_regmap_desc pfm_ppc32_pmc_desc[] = {
11639 +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", 0x0, 0, 0, SPRN_MMCR0),
11640 +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0x0, 0, 0, SPRN_MMCR1),
11641 +/* mmcr2 */ PMC_D(PFM_REG_I, "MMCR2", 0x0, 0, 0, SPRN_MMCR2),
11642 +};
11643 +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_ppc32_pmc_desc)
11644 +
11645 +static struct pfm_regmap_desc pfm_ppc32_pmd_desc[] = {
11646 +/* pmd0 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
11647 +/* pmd1 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
11648 +/* pmd2 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
11649 +/* pmd3 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
11650 +/* pmd4 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
11651 +/* pmd5 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
11652 +};
11653 +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_ppc32_pmd_desc)
11654 +
11655 +static void perfmon_perf_irq(struct pt_regs *regs)
11656 +{
11657 + u32 mmcr0;
11658 +
11659 + /* BLATANTLY STOLEN FROM OPROFILE, then modified */
11660 +
11661 + /* set the PMM bit (see comment below) */
11662 + mtmsr(mfmsr() | MSR_PMM);
11663 +
11664 + pfm_interrupt_handler(instruction_pointer(regs), regs);
11665 +
11666 + /* The freeze bit was set by the interrupt.
11667 + * Clear the freeze bit, and reenable the interrupt.
11668 + * The counters won't actually start until the rfi clears
11669 + * the PMM bit.
11670 + */
11671 +
11672 + /* Unfreezes the counters on this CPU, enables the interrupt,
11673 + * enables the counters to trigger the interrupt, and sets the
11674 + * counters to only count when the mark bit is not set.
11675 + */
11676 + mmcr0 = mfspr(SPRN_MMCR0);
11677 +
11678 + mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0);
11679 + mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
11680 +
11681 + mtspr(SPRN_MMCR0, mmcr0);
11682 +}
11683 +
11684 +static int pfm_ppc32_probe_pmu(void)
11685 +{
11686 + enum ppc32_pmu_type pm_type;
11687 + int nmmcr = 0, npmds = 0, intsok = 0, i;
11688 + unsigned int pvr;
11689 + char *str;
11690 +
11691 + pvr = mfspr(SPRN_PVR);
11692 +
11693 + switch (PVR_VER(pvr)) {
11694 + case 0x0004: /* 604 */
11695 + str = "PPC604";
11696 + pm_type = PFM_POWERPC_PMU_604;
11697 + nmmcr = 1;
11698 + npmds = 2;
11699 + break;
11700 + case 0x0009: /* 604e; */
11701 + case 0x000A: /* 604ev */
11702 + str = "PPC604e";
11703 + pm_type = PFM_POWERPC_PMU_604e;
11704 + nmmcr = 2;
11705 + npmds = 4;
11706 + break;
11707 + case 0x0008: /* 750/740 */
11708 + str = "PPC750";
11709 + pm_type = PFM_POWERPC_PMU_750;
11710 + nmmcr = 2;
11711 + npmds = 4;
11712 + break;
11713 + case 0x7000: /* 750FX */
11714 + case 0x7001:
11715 + str = "PPC750";
11716 + pm_type = PFM_POWERPC_PMU_750;
11717 + nmmcr = 2;
11718 + npmds = 4;
11719 + if ((pvr & 0xFF0F) >= 0x0203)
11720 + intsok = 1;
11721 + break;
11722 + case 0x7002: /* 750GX */
11723 + str = "PPC750";
11724 + pm_type = PFM_POWERPC_PMU_750;
11725 + nmmcr = 2;
11726 + npmds = 4;
11727 + intsok = 1;
11728 + case 0x000C: /* 7400 */
11729 + str = "PPC7400";
11730 + pm_type = PFM_POWERPC_PMU_7400;
11731 + nmmcr = 3;
11732 + npmds = 4;
11733 + break;
11734 + case 0x800C: /* 7410 */
11735 + str = "PPC7410";
11736 + pm_type = PFM_POWERPC_PMU_7400;
11737 + nmmcr = 3;
11738 + npmds = 4;
11739 + if ((pvr & 0xFFFF) >= 0x01103)
11740 + intsok = 1;
11741 + break;
11742 + case 0x8000: /* 7451/7441 */
11743 + case 0x8001: /* 7455/7445 */
11744 + case 0x8002: /* 7457/7447 */
11745 + case 0x8003: /* 7447A */
11746 + case 0x8004: /* 7448 */
11747 + str = "PPC7450";
11748 + pm_type = PFM_POWERPC_PMU_7450;
11749 + nmmcr = 3; npmds = 6;
11750 + intsok = 1;
11751 + break;
11752 + default:
11753 + PFM_INFO("Unknown PVR_VER(0x%x)\n", PVR_VER(pvr));
11754 + return -1;
11755 + }
11756 +
11757 + /*
11758 + * deconfigure unimplemented registers
11759 + */
11760 + for (i = npmds; i < PFM_PM_NUM_PMDS; i++)
11761 + pfm_ppc32_pmd_desc[i].type = PFM_REG_NA;
11762 +
11763 + for (i = nmmcr; i < PFM_PM_NUM_PMCS; i++)
11764 + pfm_ppc32_pmc_desc[i].type = PFM_REG_NA;
11765 +
11766 + /*
11767 + * update PMU description structure
11768 + */
11769 + pfm_ppc32_pmu_conf.pmu_name = str;
11770 + pfm_ppc32_pmu_info.pmu_style = pm_type;
11771 + pfm_ppc32_pmu_conf.num_pmc_entries = nmmcr;
11772 + pfm_ppc32_pmu_conf.num_pmd_entries = npmds;
11773 +
11774 + if (intsok == 0)
11775 + PFM_INFO("Interrupts unlikely to work\n");
11776 +
11777 + return reserve_pmc_hardware(perfmon_perf_irq);
11778 +}
11779 +
11780 +static void pfm_ppc32_write_pmc(unsigned int cnum, u64 value)
11781 +{
11782 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
11783 + case SPRN_MMCR0:
11784 + mtspr(SPRN_MMCR0, value);
11785 + break;
11786 + case SPRN_MMCR1:
11787 + mtspr(SPRN_MMCR1, value);
11788 + break;
11789 + case SPRN_MMCR2:
11790 + mtspr(SPRN_MMCR2, value);
11791 + break;
11792 + default:
11793 + BUG();
11794 + }
11795 +}
11796 +
11797 +static void pfm_ppc32_write_pmd(unsigned int cnum, u64 value)
11798 +{
11799 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11800 + case SPRN_PMC1:
11801 + mtspr(SPRN_PMC1, value);
11802 + break;
11803 + case SPRN_PMC2:
11804 + mtspr(SPRN_PMC2, value);
11805 + break;
11806 + case SPRN_PMC3:
11807 + mtspr(SPRN_PMC3, value);
11808 + break;
11809 + case SPRN_PMC4:
11810 + mtspr(SPRN_PMC4, value);
11811 + break;
11812 + case SPRN_PMC5:
11813 + mtspr(SPRN_PMC5, value);
11814 + break;
11815 + case SPRN_PMC6:
11816 + mtspr(SPRN_PMC6, value);
11817 + break;
11818 + default:
11819 + BUG();
11820 + }
11821 +}
11822 +
11823 +static u64 pfm_ppc32_read_pmd(unsigned int cnum)
11824 +{
11825 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
11826 + case SPRN_PMC1:
11827 + return mfspr(SPRN_PMC1);
11828 + case SPRN_PMC2:
11829 + return mfspr(SPRN_PMC2);
11830 + case SPRN_PMC3:
11831 + return mfspr(SPRN_PMC3);
11832 + case SPRN_PMC4:
11833 + return mfspr(SPRN_PMC4);
11834 + case SPRN_PMC5:
11835 + return mfspr(SPRN_PMC5);
11836 + case SPRN_PMC6:
11837 + return mfspr(SPRN_PMC6);
11838 + default:
11839 + BUG();
11840 + }
11841 +}
11842 +
11843 +/**
11844 + * pfm_ppc32_enable_counters
11845 + *
11846 + * Just need to load the current values into the control registers.
11847 + **/
11848 +static void pfm_ppc32_enable_counters(struct pfm_context *ctx,
11849 + struct pfm_event_set *set)
11850 +{
11851 + unsigned int i, max_pmc;
11852 +
11853 + max_pmc = pfm_pmu_conf->regs.max_pmc;
11854 +
11855 + for (i = 0; i < max_pmc; i++)
11856 + if (test_bit(i, set->used_pmcs))
11857 + pfm_ppc32_write_pmc(i, set->pmcs[i]);
11858 +}
11859 +
11860 +/**
11861 + * pfm_ppc32_disable_counters
11862 + *
11863 + * Just need to zero all the control registers.
11864 + **/
11865 +static void pfm_ppc32_disable_counters(struct pfm_context *ctx,
11866 + struct pfm_event_set *set)
11867 +{
11868 + unsigned int i, max;
11869 +
11870 + max = pfm_pmu_conf->regs.max_pmc;
11871 +
11872 + for (i = 0; i < max; i++)
11873 + if (test_bit(i, set->used_pmcs))
11874 + pfm_ppc32_write_pmc(ctx, 0);
11875 +}
11876 +
11877 +/**
11878 + * pfm_ppc32_get_ovfl_pmds
11879 + *
11880 + * Determine which counters in this set have overflowed and fill in the
11881 + * set->povfl_pmds mask and set->npend_ovfls count.
11882 + **/
11883 +static void pfm_ppc32_get_ovfl_pmds(struct pfm_context *ctx,
11884 + struct pfm_event_set *set)
11885 +{
11886 + unsigned int i;
11887 + unsigned int max_pmd = pfm_pmu_conf->regs.max_cnt_pmd;
11888 + u64 *used_pmds = set->used_pmds;
11889 + u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds;
11890 + u64 width_mask = 1 << pfm_pmu_conf->counter_width;
11891 + u64 new_val, mask[PFM_PMD_BV];
11892 +
11893 + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds),
11894 + cast_ulp(used_pmds), max_pmd);
11895 +
11896 + for (i = 0; i < max_pmd; i++) {
11897 + if (test_bit(i, mask)) {
11898 + new_val = pfm_ppc32_read_pmd(i);
11899 + if (new_val & width_mask) {
11900 + set_bit(i, set->povfl_pmds);
11901 + set->npend_ovfls++;
11902 + }
11903 + }
11904 + }
11905 +}
11906 +
11907 +struct pfm_arch_pmu_info pfm_ppc32_pmu_info = {
11908 + .pmu_style = PFM_POWERPC_PMU_NONE,
11909 + .write_pmc = pfm_ppc32_write_pmc,
11910 + .write_pmd = pfm_ppc32_write_pmd,
11911 + .read_pmd = pfm_ppc32_read_pmd,
11912 + .get_ovfl_pmds = pfm_ppc32_get_ovfl_pmds,
11913 + .enable_counters = pfm_ppc32_enable_counters,
11914 + .disable_counters = pfm_ppc32_disable_counters,
11915 +};
11916 +
11917 +static struct pfm_pmu_config pfm_ppc32_pmu_conf = {
11918 + .counter_width = 31,
11919 + .pmd_desc = pfm_ppc32_pmd_desc,
11920 + .pmc_desc = pfm_ppc32_pmc_desc,
11921 + .probe_pmu = pfm_ppc32_probe_pmu,
11922 + .flags = PFM_PMU_BUILTIN_FLAG,
11923 + .owner = THIS_MODULE,
11924 + .version = "0.1",
11925 + .arch_info = &pfm_ppc32_pmu_info,
11926 +};
11927 +
11928 +static int __init pfm_ppc32_pmu_init_module(void)
11929 +{
11930 + return pfm_pmu_register(&pfm_ppc32_pmu_conf);
11931 +}
11932 +
11933 +static void __exit pfm_ppc32_pmu_cleanup_module(void)
11934 +{
11935 + release_pmc_hardware();
11936 + pfm_pmu_unregister(&pfm_ppc32_pmu_conf);
11937 +}
11938 +
11939 +module_init(pfm_ppc32_pmu_init_module);
11940 +module_exit(pfm_ppc32_pmu_cleanup_module);
11941 diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
11942 index dbc338f..e24320e 100644
11943 --- a/arch/powerpc/platforms/cell/cbe_regs.c
11944 +++ b/arch/powerpc/platforms/cell/cbe_regs.c
11945 @@ -33,6 +33,7 @@ static struct cbe_regs_map
11946 struct cbe_iic_regs __iomem *iic_regs;
11947 struct cbe_mic_tm_regs __iomem *mic_tm_regs;
11948 struct cbe_pmd_shadow_regs pmd_shadow_regs;
11949 + struct cbe_ppe_priv_regs __iomem *ppe_priv_regs;
11950 } cbe_regs_maps[MAX_CBE];
11951 static int cbe_regs_map_count;
11952
11953 @@ -145,6 +146,23 @@ struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
11954 }
11955 EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
11956
11957 +struct cbe_ppe_priv_regs __iomem *cbe_get_ppe_priv_regs(struct device_node *np)
11958 +{
11959 + struct cbe_regs_map *map = cbe_find_map(np);
11960 + if (map == NULL)
11961 + return NULL;
11962 + return map->ppe_priv_regs;
11963 +}
11964 +
11965 +struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu)
11966 +{
11967 + struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
11968 + if (map == NULL)
11969 + return NULL;
11970 + return map->ppe_priv_regs;
11971 +}
11972 +EXPORT_SYMBOL_GPL(cbe_get_cpu_ppe_priv_regs);
11973 +
11974 u32 cbe_get_hw_thread_id(int cpu)
11975 {
11976 return cbe_thread_map[cpu].thread_id;
11977 @@ -206,6 +224,11 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map)
11978 for_each_node_by_type(np, "mic-tm")
11979 if (of_get_parent(np) == be)
11980 map->mic_tm_regs = of_iomap(np, 0);
11981 +
11982 + for_each_node_by_type(np, "ppe-mmio")
11983 + if (of_get_parent(np) == be)
11984 + map->ppe_priv_regs = of_iomap(np, 0);
11985 +
11986 } else {
11987 struct device_node *cpu;
11988 /* That hack must die die die ! */
11989 @@ -227,6 +250,10 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map)
11990 prop = of_get_property(cpu, "mic-tm", NULL);
11991 if (prop != NULL)
11992 map->mic_tm_regs = ioremap(prop->address, prop->len);
11993 +
11994 + prop = of_get_property(cpu, "ppe-mmio", NULL);
11995 + if (prop != NULL)
11996 + map->ppe_priv_regs = ioremap(prop->address, prop->len);
11997 }
11998 }
11999
12000 diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
12001 index 109ae24..bafe5a6 100644
12002 --- a/arch/sparc/include/asm/hypervisor.h
12003 +++ b/arch/sparc/include/asm/hypervisor.h
12004 @@ -2713,6 +2713,30 @@ extern unsigned long sun4v_ldc_revoke(unsigned long channel,
12005 */
12006 #define HV_FAST_SET_PERFREG 0x101
12007
12008 +#define HV_N2_PERF_SPARC_CTL 0x0
12009 +#define HV_N2_PERF_DRAM_CTL0 0x1
12010 +#define HV_N2_PERF_DRAM_CNT0 0x2
12011 +#define HV_N2_PERF_DRAM_CTL1 0x3
12012 +#define HV_N2_PERF_DRAM_CNT1 0x4
12013 +#define HV_N2_PERF_DRAM_CTL2 0x5
12014 +#define HV_N2_PERF_DRAM_CNT2 0x6
12015 +#define HV_N2_PERF_DRAM_CTL3 0x7
12016 +#define HV_N2_PERF_DRAM_CNT3 0x8
12017 +
12018 +#define HV_FAST_N2_GET_PERFREG 0x104
12019 +#define HV_FAST_N2_SET_PERFREG 0x105
12020 +
12021 +#ifndef __ASSEMBLY__
12022 +extern unsigned long sun4v_niagara_getperf(unsigned long reg,
12023 + unsigned long *val);
12024 +extern unsigned long sun4v_niagara_setperf(unsigned long reg,
12025 + unsigned long val);
12026 +extern unsigned long sun4v_niagara2_getperf(unsigned long reg,
12027 + unsigned long *val);
12028 +extern unsigned long sun4v_niagara2_setperf(unsigned long reg,
12029 + unsigned long val);
12030 +#endif
12031 +
12032 /* MMU statistics services.
12033 *
12034 * The hypervisor maintains MMU statistics and privileged code provides
12035 diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
12036 index e3dd930..6cf3aec 100644
12037 --- a/arch/sparc/include/asm/irq_64.h
12038 +++ b/arch/sparc/include/asm/irq_64.h
12039 @@ -67,6 +67,9 @@ extern void virt_irq_free(unsigned int virt_irq);
12040 extern void __init init_IRQ(void);
12041 extern void fixup_irqs(void);
12042
12043 +extern int register_perfctr_intr(void (*handler)(struct pt_regs *));
12044 +extern void release_perfctr_intr(void (*handler)(struct pt_regs *));
12045 +
12046 static inline void set_softint(unsigned long bits)
12047 {
12048 __asm__ __volatile__("wr %0, 0x0, %%set_softint"
12049 diff --git a/arch/sparc/include/asm/perfmon.h b/arch/sparc/include/asm/perfmon.h
12050 new file mode 100644
12051 index 0000000..f20cbfa
12052 --- /dev/null
12053 +++ b/arch/sparc/include/asm/perfmon.h
12054 @@ -0,0 +1,11 @@
12055 +#ifndef _SPARC64_PERFMON_H_
12056 +#define _SPARC64_PERFMON_H_
12057 +
12058 +/*
12059 + * arch-specific user visible interface definitions
12060 + */
12061 +
12062 +#define PFM_ARCH_MAX_PMCS 2
12063 +#define PFM_ARCH_MAX_PMDS 3
12064 +
12065 +#endif /* _SPARC64_PERFMON_H_ */
12066 diff --git a/arch/sparc/include/asm/perfmon_kern.h b/arch/sparc/include/asm/perfmon_kern.h
12067 new file mode 100644
12068 index 0000000..033eff5
12069 --- /dev/null
12070 +++ b/arch/sparc/include/asm/perfmon_kern.h
12071 @@ -0,0 +1,286 @@
12072 +#ifndef _SPARC64_PERFMON_KERN_H_
12073 +#define _SPARC64_PERFMON_KERN_H_
12074 +
12075 +#ifdef __KERNEL__
12076 +
12077 +#ifdef CONFIG_PERFMON
12078 +
12079 +#include <linux/irq.h>
12080 +#include <asm/system.h>
12081 +
12082 +#define PFM_ARCH_PMD_STK_ARG 2
12083 +#define PFM_ARCH_PMC_STK_ARG 1
12084 +
12085 +struct pfm_arch_pmu_info {
12086 + u32 pmu_style;
12087 +};
12088 +
12089 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
12090 +{
12091 +}
12092 +
12093 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
12094 + struct pfm_event_set *set)
12095 +{}
12096 +
12097 +static inline void pfm_arch_serialize(void)
12098 +{
12099 +}
12100 +
12101 +/*
12102 + * SPARC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
12103 + * this routine needs to do it when switching sets on overflow
12104 + */
12105 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
12106 + struct pfm_event_set *set)
12107 +{
12108 + pfm_save_pmds(ctx, set);
12109 +}
12110 +
12111 +extern void pfm_arch_write_pmc(struct pfm_context *ctx,
12112 + unsigned int cnum, u64 value);
12113 +extern u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum);
12114 +
12115 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
12116 + unsigned int cnum, u64 value)
12117 +{
12118 + u64 pic;
12119 +
12120 + value &= pfm_pmu_conf->ovfl_mask;
12121 +
12122 + read_pic(pic);
12123 +
12124 + switch (cnum) {
12125 + case 0:
12126 + pic = (pic & 0xffffffff00000000UL) |
12127 + (value & 0xffffffffUL);
12128 + break;
12129 + case 1:
12130 + pic = (pic & 0xffffffffUL) |
12131 + (value << 32UL);
12132 + break;
12133 + default:
12134 + BUG();
12135 + }
12136 +
12137 + write_pic(pic);
12138 +}
12139 +
12140 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx,
12141 + unsigned int cnum)
12142 +{
12143 + u64 pic;
12144 +
12145 + read_pic(pic);
12146 +
12147 + switch (cnum) {
12148 + case 0:
12149 + return pic & 0xffffffffUL;
12150 + case 1:
12151 + return pic >> 32UL;
12152 + default:
12153 + BUG();
12154 + return 0;
12155 + }
12156 +}
12157 +
12158 +/*
12159 + * For some CPUs, the upper bits of a counter must be set in order for the
12160 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
12161 + * and the upper bits are cleared. This function may be used to set them back.
12162 + */
12163 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
12164 + unsigned int cnum)
12165 +{
12166 + u64 val = pfm_arch_read_pmd(ctx, cnum);
12167 +
12168 + /* This masks out overflow bit 31 */
12169 + pfm_arch_write_pmd(ctx, cnum, val);
12170 +}
12171 +
12172 +/*
12173 + * At certain points, perfmon needs to know if monitoring has been
12174 + * explicitely started/stopped by user via pfm_start/pfm_stop. The
12175 + * information is tracked in ctx.flags.started. However on certain
12176 + * architectures, it may be possible to start/stop directly from
12177 + * user level with a single assembly instruction bypassing
12178 + * the kernel. This function must be used to determine by
12179 + * an arch-specific mean if monitoring is actually started/stopped.
12180 + */
12181 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
12182 +{
12183 + return ctx->flags.started;
12184 +}
12185 +
12186 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
12187 + struct pfm_context *ctx)
12188 +{
12189 +}
12190 +
12191 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
12192 + struct pfm_context *ctx)
12193 +{
12194 +}
12195 +
12196 +static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
12197 + struct pfm_context *ctx)
12198 +{
12199 +}
12200 +
12201 +int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
12202 +int pfm_arch_ctxswout_thread(struct task_struct *task,
12203 + struct pfm_context *ctx);
12204 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
12205 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
12206 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
12207 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
12208 +char *pfm_arch_get_pmu_module_name(void);
12209 +
12210 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
12211 + struct pfm_event_set *set)
12212 +{
12213 + pfm_arch_stop(current, ctx);
12214 + /*
12215 + * we mark monitoring as stopped to avoid
12216 + * certain side effects especially in
12217 + * pfm_switch_sets_from_intr() on
12218 + * pfm_arch_restore_pmcs()
12219 + */
12220 + ctx->flags.started = 0;
12221 +}
12222 +
12223 +/*
12224 + * unfreeze PMU from pfm_do_interrupt_handler()
12225 + * ctx may be NULL for spurious
12226 + */
12227 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
12228 +{
12229 + if (!ctx)
12230 + return;
12231 +
12232 + PFM_DBG_ovfl("state=%d", ctx->state);
12233 +
12234 + ctx->flags.started = 1;
12235 +
12236 + if (ctx->state == PFM_CTX_MASKED)
12237 + return;
12238 +
12239 + pfm_arch_restore_pmcs(ctx, ctx->active_set);
12240 +}
12241 +
12242 +/*
12243 + * this function is called from the PMU interrupt handler ONLY.
12244 + * On SPARC, the PMU is frozen via arch_stop, masking would be implemented
12245 + * via arch-stop as well. Given that the PMU is already stopped when
12246 + * entering the interrupt handler, we do not need to stop it again, so
12247 + * this function is a nop.
12248 + */
12249 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
12250 + struct pfm_event_set *set)
12251 +{
12252 +}
12253 +
12254 +/*
12255 + * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
12256 + * need to start here.
12257 + */
12258 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
12259 + struct pfm_event_set *set)
12260 +{
12261 + pfm_arch_start(current, ctx);
12262 +}
12263 +
12264 +static inline void pfm_arch_pmu_config_remove(void)
12265 +{
12266 +}
12267 +
12268 +static inline int pfm_arch_context_create(struct pfm_context *ctx,
12269 + u32 ctx_flags)
12270 +{
12271 + return 0;
12272 +}
12273 +
12274 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
12275 +{
12276 +}
12277 +
12278 +/*
12279 + * function called from pfm_setfl_sane(). Context is locked
12280 + * and interrupts are masked.
12281 + * The value of flags is the value of ctx_flags as passed by
12282 + * user.
12283 + *
12284 + * function must check arch-specific set flags.
12285 + * Return:
12286 + * 1 when flags are valid
12287 + * 0 on error
12288 + */
12289 +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
12290 +{
12291 + return 0;
12292 +}
12293 +
12294 +static inline int pfm_arch_init(void)
12295 +{
12296 + return 0;
12297 +}
12298 +
12299 +static inline void pfm_arch_init_percpu(void)
12300 +{
12301 +}
12302 +
12303 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
12304 +{
12305 + return 0;
12306 +}
12307 +
12308 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
12309 +{}
12310 +
12311 +extern void perfmon_interrupt(struct pt_regs *);
12312 +
12313 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
12314 +{
12315 + return register_perfctr_intr(perfmon_interrupt);
12316 +}
12317 +
12318 +static inline void pfm_arch_pmu_release(void)
12319 +{
12320 + release_perfctr_intr(perfmon_interrupt);
12321 +}
12322 +
12323 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
12324 +{}
12325 +
12326 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
12327 +{}
12328 +
12329 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
12330 +{
12331 + return 0;
12332 +}
12333 +
12334 +static inline int pfm_arch_get_base_syscall(void)
12335 +{
12336 + return __NR_pfm_create_context;
12337 +}
12338 +
12339 +struct pfm_arch_context {
12340 + /* empty */
12341 +};
12342 +
12343 +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
12344 +/*
12345 + * SPARC needs extra alignment for the sampling buffer
12346 + */
12347 +#define PFM_ARCH_SMPL_ALIGN_SIZE (16 * 1024)
12348 +
12349 +static inline void pfm_cacheflush(void *addr, unsigned int len)
12350 +{
12351 +}
12352 +
12353 +#endif /* CONFIG_PERFMON */
12354 +
12355 +#endif /* __KERNEL__ */
12356 +
12357 +#endif /* _SPARC64_PERFMON_KERN_H_ */
12358 diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
12359 index db9e742..2a9ddb9 100644
12360 --- a/arch/sparc/include/asm/system_64.h
12361 +++ b/arch/sparc/include/asm/system_64.h
12362 @@ -30,6 +30,9 @@ enum sparc_cpu {
12363 #define ARCH_SUN4C_SUN4 0
12364 #define ARCH_SUN4 0
12365
12366 +extern char *sparc_cpu_type;
12367 +extern char *sparc_fpu_type;
12368 +extern char *sparc_pmu_type;
12369 extern char reboot_command[];
12370
12371 /* These are here in an effort to more fully work around Spitfire Errata
12372 @@ -104,15 +107,13 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
12373 #define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
12374 #define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
12375
12376 -/* Blackbird errata workaround. See commentary in
12377 - * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
12378 - * for more information.
12379 - */
12380 -#define reset_pic() \
12381 - __asm__ __volatile__("ba,pt %xcc, 99f\n\t" \
12382 +/* Blackbird errata workaround. */
12383 +#define write_pic(val) \
12384 + __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
12385 ".align 64\n" \
12386 - "99:wr %g0, 0x0, %pic\n\t" \
12387 - "rd %pic, %g0")
12388 + "99:wr %0, 0x0, %%pic\n\t" \
12389 + "rd %%pic, %%g0" : : "r" (val))
12390 +#define reset_pic() write_pic(0)
12391
12392 #ifndef __ASSEMBLY__
12393
12394 @@ -145,14 +146,10 @@ do { \
12395 * and 2 stores in this critical code path. -DaveM
12396 */
12397 #define switch_to(prev, next, last) \
12398 -do { if (test_thread_flag(TIF_PERFCTR)) { \
12399 - unsigned long __tmp; \
12400 - read_pcr(__tmp); \
12401 - current_thread_info()->pcr_reg = __tmp; \
12402 - read_pic(__tmp); \
12403 - current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\
12404 - current_thread_info()->kernel_cntd1 += ((__tmp) >> 32); \
12405 - } \
12406 +do { if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
12407 + pfm_ctxsw_out(prev, next); \
12408 + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
12409 + pfm_ctxsw_in(prev, next); \
12410 flush_tlb_pending(); \
12411 save_and_clear_fpu(); \
12412 /* If you are tempted to conditionalize the following */ \
12413 @@ -197,11 +194,6 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
12414 "l1", "l2", "l3", "l4", "l5", "l6", "l7", \
12415 "i0", "i1", "i2", "i3", "i4", "i5", \
12416 "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \
12417 - /* If you fuck with this, update ret_from_syscall code too. */ \
12418 - if (test_thread_flag(TIF_PERFCTR)) { \
12419 - write_pcr(current_thread_info()->pcr_reg); \
12420 - reset_pic(); \
12421 - } \
12422 } while(0)
12423
12424 static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
12425 diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
12426 index c0a737d..53857f7 100644
12427 --- a/arch/sparc/include/asm/thread_info_64.h
12428 +++ b/arch/sparc/include/asm/thread_info_64.h
12429 @@ -58,11 +58,6 @@ struct thread_info {
12430 unsigned long gsr[7];
12431 unsigned long xfsr[7];
12432
12433 - __u64 __user *user_cntd0;
12434 - __u64 __user *user_cntd1;
12435 - __u64 kernel_cntd0, kernel_cntd1;
12436 - __u64 pcr_reg;
12437 -
12438 struct restart_block restart_block;
12439
12440 struct pt_regs *kern_una_regs;
12441 @@ -96,15 +91,10 @@ struct thread_info {
12442 #define TI_RWIN_SPTRS 0x000003c8
12443 #define TI_GSR 0x00000400
12444 #define TI_XFSR 0x00000438
12445 -#define TI_USER_CNTD0 0x00000470
12446 -#define TI_USER_CNTD1 0x00000478
12447 -#define TI_KERN_CNTD0 0x00000480
12448 -#define TI_KERN_CNTD1 0x00000488
12449 -#define TI_PCR 0x00000490
12450 -#define TI_RESTART_BLOCK 0x00000498
12451 -#define TI_KUNA_REGS 0x000004c0
12452 -#define TI_KUNA_INSN 0x000004c8
12453 -#define TI_FPREGS 0x00000500
12454 +#define TI_RESTART_BLOCK 0x00000470
12455 +#define TI_KUNA_REGS 0x00000498
12456 +#define TI_KUNA_INSN 0x000004a0
12457 +#define TI_FPREGS 0x000004c0
12458
12459 /* We embed this in the uppermost byte of thread_info->flags */
12460 #define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */
12461 @@ -222,11 +212,11 @@ register struct thread_info *current_thread_info_reg asm("g6");
12462 #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
12463 #define TIF_SIGPENDING 2 /* signal pending */
12464 #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
12465 -#define TIF_PERFCTR 4 /* performance counters active */
12466 +/* Bit 4 is available */
12467 #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */
12468 /* flag bit 6 is available */
12469 #define TIF_32BIT 7 /* 32-bit binary */
12470 -/* flag bit 8 is available */
12471 +#define TIF_PERFMON_WORK 8 /* work for pfm_handle_work() */
12472 #define TIF_SECCOMP 9 /* secure computing */
12473 #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */
12474 /* flag bit 11 is available */
12475 @@ -237,22 +227,24 @@ register struct thread_info *current_thread_info_reg asm("g6");
12476 #define TIF_ABI_PENDING 12
12477 #define TIF_MEMDIE 13
12478 #define TIF_POLLING_NRFLAG 14
12479 +#define TIF_PERFMON_CTXSW 15 /* perfmon needs ctxsw calls */
12480
12481 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
12482 #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
12483 #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
12484 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
12485 -#define _TIF_PERFCTR (1<<TIF_PERFCTR)
12486 #define _TIF_UNALIGNED (1<<TIF_UNALIGNED)
12487 #define _TIF_32BIT (1<<TIF_32BIT)
12488 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
12489 #define _TIF_SECCOMP (1<<TIF_SECCOMP)
12490 #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
12491 #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
12492 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
12493 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
12494
12495 #define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \
12496 _TIF_DO_NOTIFY_RESUME_MASK | \
12497 - _TIF_NEED_RESCHED | _TIF_PERFCTR)
12498 + _TIF_NEED_RESCHED)
12499 #define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING)
12500
12501 /*
12502 diff --git a/arch/sparc/include/asm/unistd_32.h b/arch/sparc/include/asm/unistd_32.h
12503 index 648643a..efe4d86 100644
12504 --- a/arch/sparc/include/asm/unistd_32.h
12505 +++ b/arch/sparc/include/asm/unistd_32.h
12506 @@ -338,8 +338,20 @@
12507 #define __NR_dup3 320
12508 #define __NR_pipe2 321
12509 #define __NR_inotify_init1 322
12510 +#define __NR_pfm_create_context 323
12511 +#define __NR_pfm_write_pmcs 324
12512 +#define __NR_pfm_write_pmds 325
12513 +#define __NR_pfm_read_pmds 326
12514 +#define __NR_pfm_load_context 327
12515 +#define __NR_pfm_start 328
12516 +#define __NR_pfm_stop 329
12517 +#define __NR_pfm_restart 330
12518 +#define __NR_pfm_create_evtsets 331
12519 +#define __NR_pfm_getinfo_evtsets 332
12520 +#define __NR_pfm_delete_evtsets 333
12521 +#define __NR_pfm_unload_context 334
12522
12523 -#define NR_SYSCALLS 323
12524 +#define NR_SYSCALLS 325
12525
12526 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
12527 * it never had the plain ones and there is no value to adding those
12528 diff --git a/arch/sparc/include/asm/unistd_64.h b/arch/sparc/include/asm/unistd_64.h
12529 index c5cc0e0..cbbb0b5 100644
12530 --- a/arch/sparc/include/asm/unistd_64.h
12531 +++ b/arch/sparc/include/asm/unistd_64.h
12532 @@ -340,8 +340,20 @@
12533 #define __NR_dup3 320
12534 #define __NR_pipe2 321
12535 #define __NR_inotify_init1 322
12536 +#define __NR_pfm_create_context 323
12537 +#define __NR_pfm_write_pmcs 324
12538 +#define __NR_pfm_write_pmds 325
12539 +#define __NR_pfm_read_pmds 326
12540 +#define __NR_pfm_load_context 327
12541 +#define __NR_pfm_start 328
12542 +#define __NR_pfm_stop 329
12543 +#define __NR_pfm_restart 330
12544 +#define __NR_pfm_create_evtsets 331
12545 +#define __NR_pfm_getinfo_evtsets 332
12546 +#define __NR_pfm_delete_evtsets 333
12547 +#define __NR_pfm_unload_context 334
12548
12549 -#define NR_SYSCALLS 323
12550 +#define NR_SYSCALLS 335
12551
12552 #ifdef __KERNEL__
12553 #define __ARCH_WANT_IPC_PARSE_VERSION
12554 diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
12555 index e1b9233..727e4e7 100644
12556 --- a/arch/sparc/kernel/systbls.S
12557 +++ b/arch/sparc/kernel/systbls.S
12558 @@ -81,4 +81,6 @@ sys_call_table:
12559 /*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
12560 /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
12561 /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
12562 -/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1
12563 +/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs, sys_pfm_write_pmds
12564 +/*325*/ .long sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
12565 +/*330*/ .long sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
12566 diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
12567 index 36b4b7a..5555d1e 100644
12568 --- a/arch/sparc64/Kconfig
12569 +++ b/arch/sparc64/Kconfig
12570 @@ -401,6 +401,8 @@ source "drivers/sbus/char/Kconfig"
12571
12572 source "fs/Kconfig"
12573
12574 +source "arch/sparc64/perfmon/Kconfig"
12575 +
12576 source "arch/sparc64/Kconfig.debug"
12577
12578 source "security/Kconfig"
12579 diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile
12580 index b785a39..646731c 100644
12581 --- a/arch/sparc64/Makefile
12582 +++ b/arch/sparc64/Makefile
12583 @@ -32,6 +32,8 @@ core-y += arch/sparc64/math-emu/
12584 libs-y += arch/sparc64/prom/ arch/sparc64/lib/
12585 drivers-$(CONFIG_OPROFILE) += arch/sparc64/oprofile/
12586
12587 +core-$(CONFIG_PERFMON) += arch/sparc64/perfmon/
12588 +
12589 boot := arch/sparc64/boot
12590
12591 image tftpboot.img vmlinux.aout: vmlinux
12592 diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
12593 index 0097c08..f839f84 100644
12594 --- a/arch/sparc64/kernel/cpu.c
12595 +++ b/arch/sparc64/kernel/cpu.c
12596 @@ -20,16 +20,17 @@
12597 DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
12598
12599 struct cpu_iu_info {
12600 - short manuf;
12601 - short impl;
12602 - char* cpu_name; /* should be enough I hope... */
12603 + short manuf;
12604 + short impl;
12605 + char *cpu_name;
12606 + char *pmu_name;
12607 };
12608
12609 struct cpu_fp_info {
12610 - short manuf;
12611 - short impl;
12612 - char fpu_vers;
12613 - char* fp_name;
12614 + short manuf;
12615 + short impl;
12616 + char fpu_vers;
12617 + char* fp_name;
12618 };
12619
12620 static struct cpu_fp_info linux_sparc_fpu[] = {
12621 @@ -49,23 +50,24 @@ static struct cpu_fp_info linux_sparc_fpu[] = {
12622 #define NSPARCFPU ARRAY_SIZE(linux_sparc_fpu)
12623
12624 static struct cpu_iu_info linux_sparc_chips[] = {
12625 - { 0x17, 0x10, "TI UltraSparc I (SpitFire)"},
12626 - { 0x22, 0x10, "TI UltraSparc I (SpitFire)"},
12627 - { 0x17, 0x11, "TI UltraSparc II (BlackBird)"},
12628 - { 0x17, 0x12, "TI UltraSparc IIi (Sabre)"},
12629 - { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)"},
12630 - { 0x3e, 0x14, "TI UltraSparc III (Cheetah)"},
12631 - { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)"},
12632 - { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)"},
12633 - { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)"},
12634 - { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)"},
12635 - { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)"},
12636 -};
12637 + { 0x17, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"},
12638 + { 0x22, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"},
12639 + { 0x17, 0x11, "TI UltraSparc II (BlackBird)", "ultra12"},
12640 + { 0x17, 0x12, "TI UltraSparc IIi (Sabre)", "ultra12"},
12641 + { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"},
12642 + { 0x3e, 0x14, "TI UltraSparc III (Cheetah)", "ultra3"},
12643 + { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"},
12644 + { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"},
12645 + { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)", "ultra4"},
12646 + { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"},
12647 + { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3+"},
12648 + };
12649
12650 #define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
12651
12652 char *sparc_cpu_type;
12653 char *sparc_fpu_type;
12654 +char *sparc_pmu_type;
12655
12656 static void __init sun4v_cpu_probe(void)
12657 {
12658 @@ -73,11 +75,13 @@ static void __init sun4v_cpu_probe(void)
12659 case SUN4V_CHIP_NIAGARA1:
12660 sparc_cpu_type = "UltraSparc T1 (Niagara)";
12661 sparc_fpu_type = "UltraSparc T1 integrated FPU";
12662 + sparc_pmu_type = "niagara";
12663 break;
12664
12665 case SUN4V_CHIP_NIAGARA2:
12666 sparc_cpu_type = "UltraSparc T2 (Niagara2)";
12667 sparc_fpu_type = "UltraSparc T2 integrated FPU";
12668 + sparc_pmu_type = "niagara2";
12669 break;
12670
12671 default:
12672 @@ -85,6 +89,7 @@ static void __init sun4v_cpu_probe(void)
12673 prom_cpu_compatible);
12674 sparc_cpu_type = "Unknown SUN4V CPU";
12675 sparc_fpu_type = "Unknown SUN4V FPU";
12676 + sparc_pmu_type = "Unknown SUN4V PMU";
12677 break;
12678 }
12679 }
12680 @@ -117,6 +122,8 @@ retry:
12681 if (linux_sparc_chips[i].impl == impl) {
12682 sparc_cpu_type =
12683 linux_sparc_chips[i].cpu_name;
12684 + sparc_pmu_type =
12685 + linux_sparc_chips[i].pmu_name;
12686 break;
12687 }
12688 }
12689 @@ -134,7 +141,7 @@ retry:
12690 printk("DEBUG: manuf[%lx] impl[%lx]\n",
12691 manuf, impl);
12692 }
12693 - sparc_cpu_type = "Unknown CPU";
12694 + sparc_pmu_type = "Unknown PMU";
12695 }
12696
12697 for (i = 0; i < NSPARCFPU; i++) {
12698 diff --git a/arch/sparc64/kernel/hvcalls.S b/arch/sparc64/kernel/hvcalls.S
12699 index a2810f3..b9f508c 100644
12700 --- a/arch/sparc64/kernel/hvcalls.S
12701 +++ b/arch/sparc64/kernel/hvcalls.S
12702 @@ -884,3 +884,44 @@ sun4v_mmu_demap_all:
12703 retl
12704 nop
12705 .size sun4v_mmu_demap_all, .-sun4v_mmu_demap_all
12706 +
12707 + .globl sun4v_niagara_getperf
12708 + .type sun4v_niagara_getperf,#function
12709 +sun4v_niagara_getperf:
12710 + mov %o0, %o4
12711 + mov HV_FAST_GET_PERFREG, %o5
12712 + ta HV_FAST_TRAP
12713 + stx %o1, [%o4]
12714 + retl
12715 + nop
12716 + .size sun4v_niagara_getperf, .-sun4v_niagara_getperf
12717 +
12718 + .globl sun4v_niagara_setperf
12719 + .type sun4v_niagara_setperf,#function
12720 +sun4v_niagara_setperf:
12721 + mov HV_FAST_SET_PERFREG, %o5
12722 + ta HV_FAST_TRAP
12723 + retl
12724 + nop
12725 + .size sun4v_niagara_setperf, .-sun4v_niagara_setperf
12726 +
12727 + .globl sun4v_niagara2_getperf
12728 + .type sun4v_niagara2_getperf,#function
12729 +sun4v_niagara2_getperf:
12730 + mov %o0, %o4
12731 + mov HV_FAST_N2_GET_PERFREG, %o5
12732 + ta HV_FAST_TRAP
12733 + stx %o1, [%o4]
12734 + retl
12735 + nop
12736 + .size sun4v_niagara2_getperf, .-sun4v_niagara2_getperf
12737 +
12738 + .globl sun4v_niagara2_setperf
12739 + .type sun4v_niagara2_setperf,#function
12740 +sun4v_niagara2_setperf:
12741 + mov HV_FAST_N2_SET_PERFREG, %o5
12742 + ta HV_FAST_TRAP
12743 + retl
12744 + nop
12745 + .size sun4v_niagara2_setperf, .-sun4v_niagara2_setperf
12746 +
12747 diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
12748 index 7495bc7..e2bcca5 100644
12749 --- a/arch/sparc64/kernel/irq.c
12750 +++ b/arch/sparc64/kernel/irq.c
12751 @@ -749,6 +749,20 @@ void handler_irq(int irq, struct pt_regs *regs)
12752 irq_exit();
12753 set_irq_regs(old_regs);
12754 }
12755 +static void unhandled_perf_irq(struct pt_regs *regs)
12756 +{
12757 + unsigned long pcr, pic;
12758 +
12759 + read_pcr(pcr);
12760 + read_pic(pic);
12761 +
12762 + write_pcr(0);
12763 +
12764 + printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
12765 + smp_processor_id());
12766 + printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
12767 + smp_processor_id(), pcr, pic);
12768 +}
12769
12770 void do_softirq(void)
12771 {
12772 @@ -776,6 +790,55 @@ void do_softirq(void)
12773 local_irq_restore(flags);
12774 }
12775
12776 +/* Almost a direct copy of the powerpc PMC code. */
12777 +static DEFINE_SPINLOCK(perf_irq_lock);
12778 +static void *perf_irq_owner_caller; /* mostly for debugging */
12779 +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
12780 +
12781 +/* Invoked from level 15 PIL handler in trap table. */
12782 +void perfctr_irq(int irq, struct pt_regs *regs)
12783 +{
12784 + clear_softint(1 << irq);
12785 + perf_irq(regs);
12786 +}
12787 +
12788 +int register_perfctr_intr(void (*handler)(struct pt_regs *))
12789 +{
12790 + int ret;
12791 +
12792 + if (!handler)
12793 + return -EINVAL;
12794 +
12795 + spin_lock(&perf_irq_lock);
12796 + if (perf_irq != unhandled_perf_irq) {
12797 + printk(KERN_WARNING "register_perfctr_intr: "
12798 + "perf IRQ busy (reserved by caller %p)\n",
12799 + perf_irq_owner_caller);
12800 + ret = -EBUSY;
12801 + goto out;
12802 + }
12803 +
12804 + perf_irq_owner_caller = __builtin_return_address(0);
12805 + perf_irq = handler;
12806 +
12807 + ret = 0;
12808 +out:
12809 + spin_unlock(&perf_irq_lock);
12810 +
12811 + return ret;
12812 +}
12813 +EXPORT_SYMBOL_GPL(register_perfctr_intr);
12814 +
12815 +void release_perfctr_intr(void (*handler)(struct pt_regs *))
12816 +{
12817 + spin_lock(&perf_irq_lock);
12818 + perf_irq_owner_caller = NULL;
12819 + perf_irq = unhandled_perf_irq;
12820 + spin_unlock(&perf_irq_lock);
12821 +}
12822 +EXPORT_SYMBOL_GPL(release_perfctr_intr);
12823 +
12824 +
12825 #ifdef CONFIG_HOTPLUG_CPU
12826 void fixup_irqs(void)
12827 {
12828 diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
12829 index 15f4178..7282d21 100644
12830 --- a/arch/sparc64/kernel/process.c
12831 +++ b/arch/sparc64/kernel/process.c
12832 @@ -30,6 +30,7 @@
12833 #include <linux/cpu.h>
12834 #include <linux/elfcore.h>
12835 #include <linux/sysrq.h>
12836 +#include <linux/perfmon_kern.h>
12837
12838 #include <asm/oplib.h>
12839 #include <asm/uaccess.h>
12840 @@ -385,11 +386,7 @@ void exit_thread(void)
12841 t->utraps[0]--;
12842 }
12843
12844 - if (test_and_clear_thread_flag(TIF_PERFCTR)) {
12845 - t->user_cntd0 = t->user_cntd1 = NULL;
12846 - t->pcr_reg = 0;
12847 - write_pcr(0);
12848 - }
12849 + pfm_exit_thread();
12850 }
12851
12852 void flush_thread(void)
12853 @@ -411,13 +408,6 @@ void flush_thread(void)
12854
12855 set_thread_wsaved(0);
12856
12857 - /* Turn off performance counters if on. */
12858 - if (test_and_clear_thread_flag(TIF_PERFCTR)) {
12859 - t->user_cntd0 = t->user_cntd1 = NULL;
12860 - t->pcr_reg = 0;
12861 - write_pcr(0);
12862 - }
12863 -
12864 /* Clear FPU register state. */
12865 t->fpsaved[0] = 0;
12866
12867 @@ -631,16 +621,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
12868 t->kregs->u_regs[UREG_FP] =
12869 ((unsigned long) child_sf) - STACK_BIAS;
12870
12871 - /* Special case, if we are spawning a kernel thread from
12872 - * a userspace task (usermode helper, NFS or similar), we
12873 - * must disable performance counters in the child because
12874 - * the address space and protection realm are changing.
12875 - */
12876 - if (t->flags & _TIF_PERFCTR) {
12877 - t->user_cntd0 = t->user_cntd1 = NULL;
12878 - t->pcr_reg = 0;
12879 - t->flags &= ~_TIF_PERFCTR;
12880 - }
12881 t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
12882 t->kregs->u_regs[UREG_G6] = (unsigned long) t;
12883 t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
12884 @@ -673,6 +653,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
12885 if (clone_flags & CLONE_SETTLS)
12886 t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
12887
12888 + pfm_copy_thread(p);
12889 +
12890 return 0;
12891 }
12892
12893 diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
12894 index 97a993c..c2af29d 100644
12895 --- a/arch/sparc64/kernel/rtrap.S
12896 +++ b/arch/sparc64/kernel/rtrap.S
12897 @@ -65,55 +65,14 @@ __handle_user_windows:
12898 ba,pt %xcc, __handle_user_windows_continue
12899
12900 andn %l1, %l4, %l1
12901 -__handle_perfctrs:
12902 - call update_perfctrs
12903 - wrpr %g0, RTRAP_PSTATE, %pstate
12904 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12905 - ldub [%g6 + TI_WSAVED], %o2
12906 - brz,pt %o2, 1f
12907 - nop
12908 - /* Redo userwin+sched+sig checks */
12909 - call fault_in_user_windows
12910 -
12911 - wrpr %g0, RTRAP_PSTATE, %pstate
12912 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12913 - ldx [%g6 + TI_FLAGS], %l0
12914 - andcc %l0, _TIF_NEED_RESCHED, %g0
12915 - be,pt %xcc, 1f
12916 -
12917 - nop
12918 - call schedule
12919 - wrpr %g0, RTRAP_PSTATE, %pstate
12920 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12921 - ldx [%g6 + TI_FLAGS], %l0
12922 -1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
12923 -
12924 - be,pt %xcc, __handle_perfctrs_continue
12925 - sethi %hi(TSTATE_PEF), %o0
12926 - mov %l5, %o1
12927 - add %sp, PTREGS_OFF, %o0
12928 - mov %l0, %o2
12929 - call do_notify_resume
12930 -
12931 - wrpr %g0, RTRAP_PSTATE, %pstate
12932 - wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
12933 - /* Signal delivery can modify pt_regs tstate, so we must
12934 - * reload it.
12935 - */
12936 - ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
12937 - sethi %hi(0xf << 20), %l4
12938 - and %l1, %l4, %l4
12939 - andn %l1, %l4, %l1
12940 - ba,pt %xcc, __handle_perfctrs_continue
12941 -
12942 - sethi %hi(TSTATE_PEF), %o0
12943 __handle_userfpu:
12944 rd %fprs, %l5
12945 andcc %l5, FPRS_FEF, %g0
12946 sethi %hi(TSTATE_PEF), %o0
12947 be,a,pn %icc, __handle_userfpu_continue
12948 andn %l1, %o0, %l1
12949 - ba,a,pt %xcc, __handle_userfpu_continue
12950 + ba,pt %xcc, __handle_userfpu_continue
12951 + nop
12952
12953 __handle_signal:
12954 mov %l5, %o1
12955 @@ -202,12 +161,8 @@ __handle_signal_continue:
12956 brnz,pn %o2, __handle_user_windows
12957 nop
12958 __handle_user_windows_continue:
12959 - ldx [%g6 + TI_FLAGS], %l5
12960 - andcc %l5, _TIF_PERFCTR, %g0
12961 sethi %hi(TSTATE_PEF), %o0
12962 - bne,pn %xcc, __handle_perfctrs
12963 -__handle_perfctrs_continue:
12964 - andcc %l1, %o0, %g0
12965 + andcc %l1, %o0, %g0
12966
12967 /* This fpdepth clear is necessary for non-syscall rtraps only */
12968 user_nowork:
12969 diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
12970 index c8b03a4..248aa1f 100644
12971 --- a/arch/sparc64/kernel/setup.c
12972 +++ b/arch/sparc64/kernel/setup.c
12973 @@ -352,6 +352,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
12974 seq_printf(m,
12975 "cpu\t\t: %s\n"
12976 "fpu\t\t: %s\n"
12977 + "pmu\t\t: %s\n"
12978 "prom\t\t: %s\n"
12979 "type\t\t: %s\n"
12980 "ncpus probed\t: %d\n"
12981 @@ -364,6 +365,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
12982 ,
12983 sparc_cpu_type,
12984 sparc_fpu_type,
12985 + sparc_pmu_type,
12986 prom_version,
12987 ((tlb_type == hypervisor) ?
12988 "sun4v" :
12989 diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
12990 index ec82d76..cea1082 100644
12991 --- a/arch/sparc64/kernel/signal.c
12992 +++ b/arch/sparc64/kernel/signal.c
12993 @@ -23,6 +23,7 @@
12994 #include <linux/tty.h>
12995 #include <linux/binfmts.h>
12996 #include <linux/bitops.h>
12997 +#include <linux/perfmon_kern.h>
12998
12999 #include <asm/uaccess.h>
13000 #include <asm/ptrace.h>
13001 @@ -608,6 +609,9 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
13002
13003 void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
13004 {
13005 + if (thread_info_flags & _TIF_PERFMON_WORK)
13006 + pfm_handle_work(regs);
13007 +
13008 if (thread_info_flags & _TIF_SIGPENDING)
13009 do_signal(regs, orig_i0);
13010 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
13011 diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
13012 index 39749e3..384004b 100644
13013 --- a/arch/sparc64/kernel/sys_sparc.c
13014 +++ b/arch/sparc64/kernel/sys_sparc.c
13015 @@ -26,7 +26,6 @@
13016
13017 #include <asm/uaccess.h>
13018 #include <asm/utrap.h>
13019 -#include <asm/perfctr.h>
13020 #include <asm/unistd.h>
13021
13022 #include "entry.h"
13023 @@ -791,106 +790,10 @@ asmlinkage long sys_rt_sigaction(int sig,
13024 return ret;
13025 }
13026
13027 -/* Invoked by rtrap code to update performance counters in
13028 - * user space.
13029 - */
13030 -asmlinkage void update_perfctrs(void)
13031 -{
13032 - unsigned long pic, tmp;
13033 -
13034 - read_pic(pic);
13035 - tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
13036 - __put_user(tmp, current_thread_info()->user_cntd0);
13037 - tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
13038 - __put_user(tmp, current_thread_info()->user_cntd1);
13039 - reset_pic();
13040 -}
13041 -
13042 asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1, unsigned long arg2)
13043 {
13044 - int err = 0;
13045 -
13046 - switch(opcode) {
13047 - case PERFCTR_ON:
13048 - current_thread_info()->pcr_reg = arg2;
13049 - current_thread_info()->user_cntd0 = (u64 __user *) arg0;
13050 - current_thread_info()->user_cntd1 = (u64 __user *) arg1;
13051 - current_thread_info()->kernel_cntd0 =
13052 - current_thread_info()->kernel_cntd1 = 0;
13053 - write_pcr(arg2);
13054 - reset_pic();
13055 - set_thread_flag(TIF_PERFCTR);
13056 - break;
13057 -
13058 - case PERFCTR_OFF:
13059 - err = -EINVAL;
13060 - if (test_thread_flag(TIF_PERFCTR)) {
13061 - current_thread_info()->user_cntd0 =
13062 - current_thread_info()->user_cntd1 = NULL;
13063 - current_thread_info()->pcr_reg = 0;
13064 - write_pcr(0);
13065 - clear_thread_flag(TIF_PERFCTR);
13066 - err = 0;
13067 - }
13068 - break;
13069 -
13070 - case PERFCTR_READ: {
13071 - unsigned long pic, tmp;
13072 -
13073 - if (!test_thread_flag(TIF_PERFCTR)) {
13074 - err = -EINVAL;
13075 - break;
13076 - }
13077 - read_pic(pic);
13078 - tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
13079 - err |= __put_user(tmp, current_thread_info()->user_cntd0);
13080 - tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
13081 - err |= __put_user(tmp, current_thread_info()->user_cntd1);
13082 - reset_pic();
13083 - break;
13084 - }
13085 -
13086 - case PERFCTR_CLRPIC:
13087 - if (!test_thread_flag(TIF_PERFCTR)) {
13088 - err = -EINVAL;
13089 - break;
13090 - }
13091 - current_thread_info()->kernel_cntd0 =
13092 - current_thread_info()->kernel_cntd1 = 0;
13093 - reset_pic();
13094 - break;
13095 -
13096 - case PERFCTR_SETPCR: {
13097 - u64 __user *user_pcr = (u64 __user *)arg0;
13098 -
13099 - if (!test_thread_flag(TIF_PERFCTR)) {
13100 - err = -EINVAL;
13101 - break;
13102 - }
13103 - err |= __get_user(current_thread_info()->pcr_reg, user_pcr);
13104 - write_pcr(current_thread_info()->pcr_reg);
13105 - current_thread_info()->kernel_cntd0 =
13106 - current_thread_info()->kernel_cntd1 = 0;
13107 - reset_pic();
13108 - break;
13109 - }
13110 -
13111 - case PERFCTR_GETPCR: {
13112 - u64 __user *user_pcr = (u64 __user *)arg0;
13113 -
13114 - if (!test_thread_flag(TIF_PERFCTR)) {
13115 - err = -EINVAL;
13116 - break;
13117 - }
13118 - err |= __put_user(current_thread_info()->pcr_reg, user_pcr);
13119 - break;
13120 - }
13121 -
13122 - default:
13123 - err = -EINVAL;
13124 - break;
13125 - };
13126 - return err;
13127 + /* Superceded by perfmon2 */
13128 + return -ENOSYS;
13129 }
13130
13131 /*
13132 diff --git a/arch/sparc64/kernel/syscalls.S b/arch/sparc64/kernel/syscalls.S
13133 index a2f2427..b20bf1e 100644
13134 --- a/arch/sparc64/kernel/syscalls.S
13135 +++ b/arch/sparc64/kernel/syscalls.S
13136 @@ -117,26 +117,9 @@ ret_from_syscall:
13137 stb %g0, [%g6 + TI_NEW_CHILD]
13138 ldx [%g6 + TI_FLAGS], %l0
13139 call schedule_tail
13140 - mov %g7, %o0
13141 - andcc %l0, _TIF_PERFCTR, %g0
13142 - be,pt %icc, 1f
13143 - nop
13144 - ldx [%g6 + TI_PCR], %o7
13145 - wr %g0, %o7, %pcr
13146 -
13147 - /* Blackbird errata workaround. See commentary in
13148 - * smp.c:smp_percpu_timer_interrupt() for more
13149 - * information.
13150 - */
13151 - ba,pt %xcc, 99f
13152 - nop
13153 -
13154 - .align 64
13155 -99: wr %g0, %g0, %pic
13156 - rd %pic, %g0
13157 -
13158 -1: ba,pt %xcc, ret_sys_call
13159 - ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
13160 + mov %g7, %o0
13161 + ba,pt %xcc, ret_sys_call
13162 + ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
13163
13164 .globl sparc_exit
13165 .type sparc_exit,#function
13166 diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
13167 index 0fdbf3b..1a1a296 100644
13168 --- a/arch/sparc64/kernel/systbls.S
13169 +++ b/arch/sparc64/kernel/systbls.S
13170 @@ -82,7 +82,9 @@ sys_call_table32:
13171 .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
13172 /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
13173 .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
13174 -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
13175 +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs
13176 + .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
13177 +/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
13178
13179 #endif /* CONFIG_COMPAT */
13180
13181 @@ -156,4 +158,6 @@ sys_call_table:
13182 .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
13183 /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
13184 .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
13185 -/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
13186 +/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs
13187 + .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
13188 +/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
13189 diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
13190 index c824df1..be45d09 100644
13191 --- a/arch/sparc64/kernel/traps.c
13192 +++ b/arch/sparc64/kernel/traps.c
13193 @@ -2470,86 +2470,90 @@ extern void tsb_config_offsets_are_bolixed_dave(void);
13194 /* Only invoked on boot processor. */
13195 void __init trap_init(void)
13196 {
13197 - /* Compile time sanity check. */
13198 - if (TI_TASK != offsetof(struct thread_info, task) ||
13199 - TI_FLAGS != offsetof(struct thread_info, flags) ||
13200 - TI_CPU != offsetof(struct thread_info, cpu) ||
13201 - TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
13202 - TI_KSP != offsetof(struct thread_info, ksp) ||
13203 - TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) ||
13204 - TI_KREGS != offsetof(struct thread_info, kregs) ||
13205 - TI_UTRAPS != offsetof(struct thread_info, utraps) ||
13206 - TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) ||
13207 - TI_REG_WINDOW != offsetof(struct thread_info, reg_window) ||
13208 - TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) ||
13209 - TI_GSR != offsetof(struct thread_info, gsr) ||
13210 - TI_XFSR != offsetof(struct thread_info, xfsr) ||
13211 - TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) ||
13212 - TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) ||
13213 - TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
13214 - TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
13215 - TI_PCR != offsetof(struct thread_info, pcr_reg) ||
13216 - TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
13217 - TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
13218 - TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
13219 - TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
13220 - TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
13221 - TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
13222 - TI_FPREGS != offsetof(struct thread_info, fpregs) ||
13223 - (TI_FPREGS & (64 - 1)))
13224 - thread_info_offsets_are_bolixed_dave();
13225 -
13226 - if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
13227 - (TRAP_PER_CPU_PGD_PADDR !=
13228 - offsetof(struct trap_per_cpu, pgd_paddr)) ||
13229 - (TRAP_PER_CPU_CPU_MONDO_PA !=
13230 - offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
13231 - (TRAP_PER_CPU_DEV_MONDO_PA !=
13232 - offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
13233 - (TRAP_PER_CPU_RESUM_MONDO_PA !=
13234 - offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
13235 - (TRAP_PER_CPU_RESUM_KBUF_PA !=
13236 - offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
13237 - (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
13238 - offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
13239 - (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
13240 - offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
13241 - (TRAP_PER_CPU_FAULT_INFO !=
13242 - offsetof(struct trap_per_cpu, fault_info)) ||
13243 - (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
13244 - offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
13245 - (TRAP_PER_CPU_CPU_LIST_PA !=
13246 - offsetof(struct trap_per_cpu, cpu_list_pa)) ||
13247 - (TRAP_PER_CPU_TSB_HUGE !=
13248 - offsetof(struct trap_per_cpu, tsb_huge)) ||
13249 - (TRAP_PER_CPU_TSB_HUGE_TEMP !=
13250 - offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
13251 - (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
13252 - offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
13253 - (TRAP_PER_CPU_CPU_MONDO_QMASK !=
13254 - offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
13255 - (TRAP_PER_CPU_DEV_MONDO_QMASK !=
13256 - offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
13257 - (TRAP_PER_CPU_RESUM_QMASK !=
13258 - offsetof(struct trap_per_cpu, resum_qmask)) ||
13259 - (TRAP_PER_CPU_NONRESUM_QMASK !=
13260 - offsetof(struct trap_per_cpu, nonresum_qmask)))
13261 - trap_per_cpu_offsets_are_bolixed_dave();
13262 -
13263 - if ((TSB_CONFIG_TSB !=
13264 - offsetof(struct tsb_config, tsb)) ||
13265 - (TSB_CONFIG_RSS_LIMIT !=
13266 - offsetof(struct tsb_config, tsb_rss_limit)) ||
13267 - (TSB_CONFIG_NENTRIES !=
13268 - offsetof(struct tsb_config, tsb_nentries)) ||
13269 - (TSB_CONFIG_REG_VAL !=
13270 - offsetof(struct tsb_config, tsb_reg_val)) ||
13271 - (TSB_CONFIG_MAP_VADDR !=
13272 - offsetof(struct tsb_config, tsb_map_vaddr)) ||
13273 - (TSB_CONFIG_MAP_PTE !=
13274 - offsetof(struct tsb_config, tsb_map_pte)))
13275 - tsb_config_offsets_are_bolixed_dave();
13276 -
13277 + BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task));
13278 + BUILD_BUG_ON(TI_FLAGS != offsetof(struct thread_info, flags));
13279 + BUILD_BUG_ON(TI_CPU != offsetof(struct thread_info, cpu));
13280 + BUILD_BUG_ON(TI_FPSAVED != offsetof(struct thread_info, fpsaved));
13281 + BUILD_BUG_ON(TI_KSP != offsetof(struct thread_info, ksp));
13282 + BUILD_BUG_ON(TI_FAULT_ADDR !=
13283 + offsetof(struct thread_info, fault_address));
13284 + BUILD_BUG_ON(TI_KREGS != offsetof(struct thread_info, kregs));
13285 + BUILD_BUG_ON(TI_UTRAPS != offsetof(struct thread_info, utraps));
13286 + BUILD_BUG_ON(TI_EXEC_DOMAIN !=
13287 + offsetof(struct thread_info, exec_domain));
13288 + BUILD_BUG_ON(TI_REG_WINDOW !=
13289 + offsetof(struct thread_info, reg_window));
13290 + BUILD_BUG_ON(TI_RWIN_SPTRS !=
13291 + offsetof(struct thread_info, rwbuf_stkptrs));
13292 + BUILD_BUG_ON(TI_GSR != offsetof(struct thread_info, gsr));
13293 + BUILD_BUG_ON(TI_XFSR != offsetof(struct thread_info, xfsr));
13294 + BUILD_BUG_ON(TI_PRE_COUNT !=
13295 + offsetof(struct thread_info, preempt_count));
13296 + BUILD_BUG_ON(TI_NEW_CHILD !=
13297 + offsetof(struct thread_info, new_child));
13298 + BUILD_BUG_ON(TI_SYS_NOERROR !=
13299 + offsetof(struct thread_info, syscall_noerror));
13300 + BUILD_BUG_ON(TI_RESTART_BLOCK !=
13301 + offsetof(struct thread_info, restart_block));
13302 + BUILD_BUG_ON(TI_KUNA_REGS !=
13303 + offsetof(struct thread_info, kern_una_regs));
13304 + BUILD_BUG_ON(TI_KUNA_INSN !=
13305 + offsetof(struct thread_info, kern_una_insn));
13306 + BUILD_BUG_ON(TI_FPREGS != offsetof(struct thread_info, fpregs));
13307 + BUILD_BUG_ON((TI_FPREGS & (64 - 1)));
13308 +
13309 + BUILD_BUG_ON(TRAP_PER_CPU_THREAD !=
13310 + offsetof(struct trap_per_cpu, thread));
13311 + BUILD_BUG_ON(TRAP_PER_CPU_PGD_PADDR !=
13312 + offsetof(struct trap_per_cpu, pgd_paddr));
13313 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_PA !=
13314 + offsetof(struct trap_per_cpu, cpu_mondo_pa));
13315 + BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_PA !=
13316 + offsetof(struct trap_per_cpu, dev_mondo_pa));
13317 + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_MONDO_PA !=
13318 + offsetof(struct trap_per_cpu, resum_mondo_pa));
13319 + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_KBUF_PA !=
13320 + offsetof(struct trap_per_cpu, resum_kernel_buf_pa));
13321 + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_MONDO_PA !=
13322 + offsetof(struct trap_per_cpu, nonresum_mondo_pa));
13323 + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_KBUF_PA !=
13324 + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa));
13325 + BUILD_BUG_ON(TRAP_PER_CPU_FAULT_INFO !=
13326 + offsetof(struct trap_per_cpu, fault_info));
13327 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
13328 + offsetof(struct trap_per_cpu, cpu_mondo_block_pa));
13329 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_LIST_PA !=
13330 + offsetof(struct trap_per_cpu, cpu_list_pa));
13331 + BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE !=
13332 + offsetof(struct trap_per_cpu, tsb_huge));
13333 + BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE_TEMP !=
13334 + offsetof(struct trap_per_cpu, tsb_huge_temp));
13335 +#if 0
13336 + BUILD_BUG_ON(TRAP_PER_CPU_IRQ_WORKLIST !=
13337 + offsetof(struct trap_per_cpu, irq_worklist));
13338 +#endif
13339 + BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_QMASK !=
13340 + offsetof(struct trap_per_cpu, cpu_mondo_qmask));
13341 + BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_QMASK !=
13342 + offsetof(struct trap_per_cpu, dev_mondo_qmask));
13343 + BUILD_BUG_ON(TRAP_PER_CPU_RESUM_QMASK !=
13344 + offsetof(struct trap_per_cpu, resum_qmask));
13345 + BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_QMASK !=
13346 + offsetof(struct trap_per_cpu, nonresum_qmask));
13347 +
13348 + BUILD_BUG_ON(TSB_CONFIG_TSB !=
13349 + offsetof(struct tsb_config, tsb));
13350 + BUILD_BUG_ON(TSB_CONFIG_RSS_LIMIT !=
13351 + offsetof(struct tsb_config, tsb_rss_limit));
13352 + BUILD_BUG_ON(TSB_CONFIG_NENTRIES !=
13353 + offsetof(struct tsb_config, tsb_nentries));
13354 + BUILD_BUG_ON(TSB_CONFIG_REG_VAL !=
13355 + offsetof(struct tsb_config, tsb_reg_val));
13356 + BUILD_BUG_ON(TSB_CONFIG_MAP_VADDR !=
13357 + offsetof(struct tsb_config, tsb_map_vaddr));
13358 + BUILD_BUG_ON(TSB_CONFIG_MAP_PTE !=
13359 + offsetof(struct tsb_config, tsb_map_pte));
13360 +
13361 /* Attach to the address space of init_task. On SMP we
13362 * do this in smp.c:smp_callin for other cpus.
13363 */
13364 diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
13365 index 1ade3d6..2a31ffa 100644
13366 --- a/arch/sparc64/kernel/ttable.S
13367 +++ b/arch/sparc64/kernel/ttable.S
13368 @@ -66,7 +66,7 @@ tl0_irq6: BTRAP(0x46)
13369 tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49)
13370 tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d)
13371 tl0_irq14: TRAP_IRQ(timer_interrupt, 14)
13372 -tl0_irq15: TRAP_IRQ(handler_irq, 15)
13373 +tl0_irq15: TRAP_IRQ(perfctr_irq, 15)
13374 tl0_resv050: BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55)
13375 tl0_resv056: BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b)
13376 tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f)
13377 diff --git a/arch/sparc64/perfmon/Kconfig b/arch/sparc64/perfmon/Kconfig
13378 new file mode 100644
13379 index 0000000..4672024
13380 --- /dev/null
13381 +++ b/arch/sparc64/perfmon/Kconfig
13382 @@ -0,0 +1,26 @@
13383 +menu "Hardware Performance Monitoring support"
13384 +config PERFMON
13385 + bool "Perfmon2 performance monitoring interface"
13386 + default n
13387 + help
13388 + Enables the perfmon2 interface to access the hardware
13389 + performance counters. See <http://perfmon2.sf.net/> for
13390 + more details.
13391 +
13392 +config PERFMON_DEBUG
13393 + bool "Perfmon debugging"
13394 + depends on PERFMON
13395 + default n
13396 + help
13397 + Enables perfmon debugging support
13398 +
13399 +config PERFMON_DEBUG_FS
13400 + bool "Enable perfmon statistics reporting via debugfs"
13401 + default y
13402 + depends on PERFMON && DEBUG_FS
13403 + help
13404 + Enable collection and reporting of perfmon timing statistics under
13405 + debugfs. This is used for debugging and performance analysis of the
13406 + subsystem. The debugfs filesystem must be mounted.
13407 +
13408 +endmenu
13409 diff --git a/arch/sparc64/perfmon/Makefile b/arch/sparc64/perfmon/Makefile
13410 new file mode 100644
13411 index 0000000..ad2d907
13412 --- /dev/null
13413 +++ b/arch/sparc64/perfmon/Makefile
13414 @@ -0,0 +1 @@
13415 +obj-$(CONFIG_PERFMON) += perfmon.o
13416 diff --git a/arch/sparc64/perfmon/perfmon.c b/arch/sparc64/perfmon/perfmon.c
13417 new file mode 100644
13418 index 0000000..9e29833
13419 --- /dev/null
13420 +++ b/arch/sparc64/perfmon/perfmon.c
13421 @@ -0,0 +1,422 @@
13422 +/* perfmon.c: sparc64 perfmon support
13423 + *
13424 + * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
13425 + */
13426 +
13427 +#include <linux/kernel.h>
13428 +#include <linux/module.h>
13429 +#include <linux/irq.h>
13430 +#include <linux/perfmon_kern.h>
13431 +
13432 +#include <asm/system.h>
13433 +#include <asm/spitfire.h>
13434 +#include <asm/hypervisor.h>
13435 +
13436 +struct pcr_ops {
13437 + void (*write)(u64);
13438 + u64 (*read)(void);
13439 +};
13440 +
13441 +static void direct_write_pcr(u64 val)
13442 +{
13443 + write_pcr(val);
13444 +}
13445 +
13446 +static u64 direct_read_pcr(void)
13447 +{
13448 + u64 pcr;
13449 +
13450 + read_pcr(pcr);
13451 +
13452 + return pcr;
13453 +}
13454 +
13455 +static struct pcr_ops direct_pcr_ops = {
13456 + .write = direct_write_pcr,
13457 + .read = direct_read_pcr,
13458 +};
13459 +
13460 +/* Using the hypervisor call is needed so that we can set the
13461 + * hypervisor trace bit correctly, which is hyperprivileged.
13462 + */
13463 +static void n2_write_pcr(u64 val)
13464 +{
13465 + unsigned long ret;
13466 +
13467 + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
13468 + if (val != HV_EOK)
13469 + write_pcr(val);
13470 +}
13471 +
13472 +static u64 n2_read_pcr(void)
13473 +{
13474 + u64 pcr;
13475 +
13476 + read_pcr(pcr);
13477 +
13478 + return pcr;
13479 +}
13480 +
13481 +static struct pcr_ops n2_pcr_ops = {
13482 + .write = n2_write_pcr,
13483 + .read = n2_read_pcr,
13484 +};
13485 +
13486 +static struct pcr_ops *pcr_ops;
13487 +
13488 +void pfm_arch_write_pmc(struct pfm_context *ctx,
13489 + unsigned int cnum, u64 value)
13490 +{
13491 + /*
13492 + * we only write to the actual register when monitoring is
13493 + * active (pfm_start was issued)
13494 + */
13495 + if (ctx && ctx->flags.started == 0)
13496 + return;
13497 +
13498 + pcr_ops->write(value);
13499 +}
13500 +
13501 +u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
13502 +{
13503 + return pcr_ops->read();
13504 +}
13505 +
13506 +/*
13507 + * collect pending overflowed PMDs. Called from pfm_ctxsw()
13508 + * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
13509 + * and set->npend_ovfls. Interrupts are masked
13510 + */
13511 +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
13512 +{
13513 + unsigned int max = ctx->regs.max_intr_pmd;
13514 + u64 wmask = 1ULL << pfm_pmu_conf->counter_width;
13515 + u64 *intr_pmds = ctx->regs.intr_pmds;
13516 + u64 *used_mask = set->used_pmds;
13517 + u64 mask[PFM_PMD_BV];
13518 + unsigned int i;
13519 +
13520 + bitmap_and(cast_ulp(mask),
13521 + cast_ulp(intr_pmds),
13522 + cast_ulp(used_mask),
13523 + max);
13524 +
13525 + /*
13526 + * check all PMD that can generate interrupts
13527 + * (that includes counters)
13528 + */
13529 + for (i = 0; i < max; i++) {
13530 + if (test_bit(i, mask)) {
13531 + u64 new_val = pfm_arch_read_pmd(ctx, i);
13532 +
13533 + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
13534 + i, (unsigned long long)new_val,
13535 + (new_val&wmask) ? 1 : 0);
13536 +
13537 + if (new_val & wmask) {
13538 + __set_bit(i, set->povfl_pmds);
13539 + set->npend_ovfls++;
13540 + }
13541 + }
13542 + }
13543 +}
13544 +
13545 +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
13546 + struct pfm_event_set *set)
13547 +{
13548 + unsigned int i, max = ctx->regs.max_pmc;
13549 +
13550 + /*
13551 + * clear enable bits, assume all pmcs are enable pmcs
13552 + */
13553 + for (i = 0; i < max; i++) {
13554 + if (test_bit(i, set->used_pmcs))
13555 + pfm_arch_write_pmc(ctx, i, 0);
13556 + }
13557 +
13558 + if (set->npend_ovfls)
13559 + return;
13560 +
13561 + __pfm_get_ovfl_pmds(ctx, set);
13562 +}
13563 +
13564 +/*
13565 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
13566 + * Context is locked. Interrupts are masked. Monitoring is active.
13567 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
13568 + *
13569 + * for per-thread:
13570 + * must stop monitoring for the task
13571 + *
13572 + * Return:
13573 + * non-zero : did not save PMDs (as part of stopping the PMU)
13574 + * 0 : saved PMDs (no need to save them in caller)
13575 + */
13576 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
13577 +{
13578 + /*
13579 + * disable lazy restore of PMC registers.
13580 + */
13581 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
13582 +
13583 + pfm_stop_active(task, ctx, ctx->active_set);
13584 +
13585 + return 1;
13586 +}
13587 +
13588 +/*
13589 + * Called from pfm_stop() and idle notifier
13590 + *
13591 + * Interrupts are masked. Context is locked. Set is the active set.
13592 + *
13593 + * For per-thread:
13594 + * task is not necessarily current. If not current task, then
13595 + * task is guaranteed stopped and off any cpu. Access to PMU
13596 + * is not guaranteed. Interrupts are masked. Context is locked.
13597 + * Set is the active set.
13598 + *
13599 + * For system-wide:
13600 + * task is current
13601 + *
13602 + * must disable active monitoring. ctx cannot be NULL
13603 + */
13604 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
13605 +{
13606 + /*
13607 + * no need to go through stop_save()
13608 + * if we are already stopped
13609 + */
13610 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
13611 + return;
13612 +
13613 + /*
13614 + * stop live registers and collect pending overflow
13615 + */
13616 + if (task == current)
13617 + pfm_stop_active(task, ctx, ctx->active_set);
13618 +}
13619 +
13620 +/*
13621 + * Enable active monitoring. Called from pfm_start() and
13622 + * pfm_arch_unmask_monitoring().
13623 + *
13624 + * Interrupts are masked. Context is locked. Set is the active set.
13625 + *
13626 + * For per-trhead:
13627 + * Task is not necessarily current. If not current task, then task
13628 + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
13629 + *
13630 + * For system-wide:
13631 + * task is always current
13632 + *
13633 + * must enable active monitoring.
13634 + */
13635 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
13636 +{
13637 + struct pfm_event_set *set;
13638 + unsigned int max_pmc = ctx->regs.max_pmc;
13639 + unsigned int i;
13640 +
13641 + if (task != current)
13642 + return;
13643 +
13644 + set = ctx->active_set;
13645 + for (i = 0; i < max_pmc; i++) {
13646 + if (test_bit(i, set->used_pmcs))
13647 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
13648 + }
13649 +}
13650 +
13651 +/*
13652 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
13653 + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
13654 + * context is locked. Interrupts are masked. set cannot be NULL.
13655 + * Access to the PMU is guaranteed.
13656 + *
13657 + * function must restore all PMD registers from set.
13658 + */
13659 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
13660 +{
13661 + unsigned int max_pmd = ctx->regs.max_pmd;
13662 + u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
13663 + u64 *impl_pmds = ctx->regs.pmds;
13664 + unsigned int i;
13665 +
13666 + /*
13667 + * must restore all pmds to avoid leaking
13668 + * information to user.
13669 + */
13670 + for (i = 0; i < max_pmd; i++) {
13671 + u64 val;
13672 +
13673 + if (test_bit(i, impl_pmds) == 0)
13674 + continue;
13675 +
13676 + val = set->pmds[i].value;
13677 +
13678 + /*
13679 + * set upper bits for counter to ensure
13680 + * overflow will trigger
13681 + */
13682 + val &= ovfl_mask;
13683 +
13684 + pfm_arch_write_pmd(ctx, i, val);
13685 + }
13686 +}
13687 +
13688 +/*
13689 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
13690 + * pfm_context_load_sys(), pfm_ctxsw().
13691 + * Context is locked. Interrupts are masked. set cannot be NULL.
13692 + * Access to the PMU is guaranteed.
13693 + *
13694 + * function must restore all PMC registers from set, if needed.
13695 + */
13696 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
13697 +{
13698 + unsigned int max_pmc = ctx->regs.max_pmc;
13699 + u64 *impl_pmcs = ctx->regs.pmcs;
13700 + unsigned int i;
13701 +
13702 + /* If we're masked or stopped we don't need to bother restoring
13703 + * the PMCs now.
13704 + */
13705 + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
13706 + return;
13707 +
13708 + /*
13709 + * restore all pmcs
13710 + */
13711 + for (i = 0; i < max_pmc; i++)
13712 + if (test_bit(i, impl_pmcs))
13713 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
13714 +}
13715 +
13716 +char *pfm_arch_get_pmu_module_name(void)
13717 +{
13718 + return NULL;
13719 +}
13720 +
13721 +void perfmon_interrupt(struct pt_regs *regs)
13722 +{
13723 + pfm_interrupt_handler(instruction_pointer(regs), regs);
13724 +}
13725 +
13726 +static struct pfm_regmap_desc pfm_sparc64_pmc_desc[] = {
13727 + PMC_D(PFM_REG_I, "PCR", 0, 0, 0, 0),
13728 +};
13729 +
13730 +static struct pfm_regmap_desc pfm_sparc64_pmd_desc[] = {
13731 + PMD_D(PFM_REG_C, "PIC0", 0),
13732 + PMD_D(PFM_REG_C, "PIC1", 0),
13733 +};
13734 +
13735 +static int pfm_sparc64_probe(void)
13736 +{
13737 + return 0;
13738 +}
13739 +
13740 +static struct pfm_pmu_config pmu_sparc64_pmu_conf = {
13741 + .counter_width = 31,
13742 + .pmd_desc = pfm_sparc64_pmd_desc,
13743 + .num_pmd_entries = 2,
13744 + .pmc_desc = pfm_sparc64_pmc_desc,
13745 + .num_pmc_entries = 1,
13746 + .probe_pmu = pfm_sparc64_probe,
13747 + .flags = PFM_PMU_BUILTIN_FLAG,
13748 + .owner = THIS_MODULE,
13749 +};
13750 +
13751 +static unsigned long perf_hsvc_group;
13752 +static unsigned long perf_hsvc_major;
13753 +static unsigned long perf_hsvc_minor;
13754 +
13755 +static int __init register_perf_hsvc(void)
13756 +{
13757 + if (tlb_type == hypervisor) {
13758 + switch (sun4v_chip_type) {
13759 + case SUN4V_CHIP_NIAGARA1:
13760 + perf_hsvc_group = HV_GRP_N2_CPU;
13761 + break;
13762 +
13763 + case SUN4V_CHIP_NIAGARA2:
13764 + perf_hsvc_group = HV_GRP_N2_CPU;
13765 + break;
13766 +
13767 + default:
13768 + return -ENODEV;
13769 + }
13770 +
13771 +
13772 + perf_hsvc_major = 1;
13773 + perf_hsvc_minor = 0;
13774 + if (sun4v_hvapi_register(perf_hsvc_group,
13775 + perf_hsvc_major,
13776 + &perf_hsvc_minor)) {
13777 + printk("perfmon: Could not register N2 hvapi.\n");
13778 + return -ENODEV;
13779 + }
13780 + }
13781 + return 0;
13782 +}
13783 +
13784 +static void unregister_perf_hsvc(void)
13785 +{
13786 + if (tlb_type != hypervisor)
13787 + return;
13788 + sun4v_hvapi_unregister(perf_hsvc_group);
13789 +}
13790 +
13791 +static int __init pfm_sparc64_pmu_init(void)
13792 +{
13793 + u64 mask;
13794 + int err;
13795 +
13796 + err = register_perf_hsvc();
13797 + if (err)
13798 + return err;
13799 +
13800 + if (tlb_type == hypervisor &&
13801 + sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
13802 + pcr_ops = &n2_pcr_ops;
13803 + else
13804 + pcr_ops = &direct_pcr_ops;
13805 +
13806 + if (!strcmp(sparc_pmu_type, "ultra12"))
13807 + mask = (0xf << 11) | (0xf << 4) | 0x7;
13808 + else if (!strcmp(sparc_pmu_type, "ultra3") ||
13809 + !strcmp(sparc_pmu_type, "ultra3i") ||
13810 + !strcmp(sparc_pmu_type, "ultra3+") ||
13811 + !strcmp(sparc_pmu_type, "ultra4+"))
13812 + mask = (0x3f << 11) | (0x3f << 4) | 0x7;
13813 + else if (!strcmp(sparc_pmu_type, "niagara2"))
13814 + mask = ((1UL << 63) | (1UL << 62) |
13815 + (1UL << 31) | (0xfUL << 27) | (0xffUL << 19) |
13816 + (1UL << 18) | (0xfUL << 14) | (0xff << 6) |
13817 + (0x3UL << 4) | 0x7UL);
13818 + else if (!strcmp(sparc_pmu_type, "niagara"))
13819 + mask = ((1UL << 9) | (1UL << 8) |
13820 + (0x7UL << 4) | 0x7UL);
13821 + else {
13822 + err = -ENODEV;
13823 + goto out_err;
13824 + }
13825 +
13826 + pmu_sparc64_pmu_conf.pmu_name = sparc_pmu_type;
13827 + pfm_sparc64_pmc_desc[0].rsvd_msk = ~mask;
13828 +
13829 + return pfm_pmu_register(&pmu_sparc64_pmu_conf);
13830 +
13831 +out_err:
13832 + unregister_perf_hsvc();
13833 + return err;
13834 +}
13835 +
13836 +static void __exit pfm_sparc64_pmu_exit(void)
13837 +{
13838 + unregister_perf_hsvc();
13839 + return pfm_pmu_unregister(&pmu_sparc64_pmu_conf);
13840 +}
13841 +
13842 +module_init(pfm_sparc64_pmu_init);
13843 +module_exit(pfm_sparc64_pmu_exit);
13844 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
13845 index ed92864..3a2b544 100644
13846 --- a/arch/x86/Kconfig
13847 +++ b/arch/x86/Kconfig
13848 @@ -1378,6 +1378,8 @@ config COMPAT_VDSO
13849
13850 If unsure, say Y.
13851
13852 +source "arch/x86/perfmon/Kconfig"
13853 +
13854 endmenu
13855
13856 config ARCH_ENABLE_MEMORY_HOTPLUG
13857 diff --git a/arch/x86/Makefile b/arch/x86/Makefile
13858 index f5631da..c868ad6 100644
13859 --- a/arch/x86/Makefile
13860 +++ b/arch/x86/Makefile
13861 @@ -150,6 +150,8 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
13862 core-y += arch/x86/kernel/
13863 core-y += arch/x86/mm/
13864
13865 +core-$(CONFIG_PERFMON) += arch/x86/perfmon/
13866 +
13867 # Remaining sub architecture files
13868 core-y += $(mcore-y)
13869
13870 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
13871 index ffc1bb4..58e00cb 100644
13872 --- a/arch/x86/ia32/ia32entry.S
13873 +++ b/arch/x86/ia32/ia32entry.S
13874 @@ -832,4 +832,16 @@ ia32_sys_call_table:
13875 .quad sys_dup3 /* 330 */
13876 .quad sys_pipe2
13877 .quad sys_inotify_init1
13878 + .quad sys_pfm_create_context
13879 + .quad sys_pfm_write_pmcs
13880 + .quad sys_pfm_write_pmds /* 335 */
13881 + .quad sys_pfm_read_pmds
13882 + .quad sys_pfm_load_context
13883 + .quad sys_pfm_start
13884 + .quad sys_pfm_stop
13885 + .quad sys_pfm_restart /* 340 */
13886 + .quad sys_pfm_create_evtsets
13887 + .quad sys_pfm_getinfo_evtsets
13888 + .quad sys_pfm_delete_evtsets
13889 + .quad sys_pfm_unload_context
13890 ia32_syscall_end:
13891 diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
13892 index f88bd0d..53fe335 100644
13893 --- a/arch/x86/kernel/apic_32.c
13894 +++ b/arch/x86/kernel/apic_32.c
13895 @@ -28,6 +28,7 @@
13896 #include <linux/acpi_pmtmr.h>
13897 #include <linux/module.h>
13898 #include <linux/dmi.h>
13899 +#include <linux/perfmon_kern.h>
13900
13901 #include <asm/atomic.h>
13902 #include <asm/smp.h>
13903 @@ -669,6 +670,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
13904 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
13905 return APIC_EILVT_LVTOFF_IBS;
13906 }
13907 +EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
13908
13909 /*
13910 * Local APIC start and shutdown
13911 @@ -1367,6 +1369,9 @@ void __init apic_intr_init(void)
13912 #ifdef CONFIG_X86_MCE_P4THERMAL
13913 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
13914 #endif
13915 +#ifdef CONFIG_PERFMON
13916 + set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
13917 +#endif
13918 }
13919
13920 /**
13921 diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
13922 index 446c062..574cd3b 100644
13923 --- a/arch/x86/kernel/apic_64.c
13924 +++ b/arch/x86/kernel/apic_64.c
13925 @@ -228,6 +228,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
13926 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
13927 return APIC_EILVT_LVTOFF_IBS;
13928 }
13929 +EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
13930
13931 /*
13932 * Program the next event, relative to now
13933 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
13934 index 4e456bd..5b6d6ca 100644
13935 --- a/arch/x86/kernel/cpu/common.c
13936 +++ b/arch/x86/kernel/cpu/common.c
13937 @@ -5,6 +5,7 @@
13938 #include <linux/module.h>
13939 #include <linux/percpu.h>
13940 #include <linux/bootmem.h>
13941 +#include <linux/perfmon_kern.h>
13942 #include <asm/processor.h>
13943 #include <asm/i387.h>
13944 #include <asm/msr.h>
13945 @@ -726,6 +727,8 @@ void __cpuinit cpu_init(void)
13946 current_thread_info()->status = 0;
13947 clear_used_math();
13948 mxcsr_feature_mask_init();
13949 +
13950 + pfm_init_percpu();
13951 }
13952
13953 #ifdef CONFIG_HOTPLUG_CPU
13954 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
13955 index 109792b..0b6e34c 100644
13956 --- a/arch/x86/kernel/entry_32.S
13957 +++ b/arch/x86/kernel/entry_32.S
13958 @@ -513,7 +513,7 @@ ENDPROC(system_call)
13959 ALIGN
13960 RING0_PTREGS_FRAME # can't unwind into user space anyway
13961 work_pending:
13962 - testb $_TIF_NEED_RESCHED, %cl
13963 + testw $(_TIF_NEED_RESCHED|_TIF_PERFMON_WORK), %cx
13964 jz work_notifysig
13965 work_resched:
13966 call schedule
13967 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
13968 index 89434d4..34e44f5 100644
13969 --- a/arch/x86/kernel/entry_64.S
13970 +++ b/arch/x86/kernel/entry_64.S
13971 @@ -888,7 +888,13 @@ END(error_interrupt)
13972 ENTRY(spurious_interrupt)
13973 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
13974 END(spurious_interrupt)
13975 -
13976 +
13977 +#ifdef CONFIG_PERFMON
13978 +ENTRY(pmu_interrupt)
13979 + apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt
13980 +END(pmu_interrupt)
13981 +#endif
13982 +
13983 /*
13984 * Exception entry points.
13985 */
13986 diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
13987 index 1f26fd9..83f6bc1 100644
13988 --- a/arch/x86/kernel/irqinit_64.c
13989 +++ b/arch/x86/kernel/irqinit_64.c
13990 @@ -11,6 +11,7 @@
13991 #include <linux/kernel_stat.h>
13992 #include <linux/sysdev.h>
13993 #include <linux/bitops.h>
13994 +#include <linux/perfmon_kern.h>
13995
13996 #include <asm/acpi.h>
13997 #include <asm/atomic.h>
13998 @@ -217,6 +218,10 @@ void __init native_init_IRQ(void)
13999 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
14000 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
14001
14002 +#ifdef CONFIG_PERFMON
14003 + alloc_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
14004 +#endif
14005 +
14006 if (!acpi_ioapic)
14007 setup_irq(2, &irq2);
14008 }
14009 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
14010 index 31f40b2..ed27150 100644
14011 --- a/arch/x86/kernel/process_32.c
14012 +++ b/arch/x86/kernel/process_32.c
14013 @@ -36,6 +36,7 @@
14014 #include <linux/personality.h>
14015 #include <linux/tick.h>
14016 #include <linux/percpu.h>
14017 +#include <linux/perfmon_kern.h>
14018 #include <linux/prctl.h>
14019
14020 #include <asm/uaccess.h>
14021 @@ -277,6 +278,7 @@ void exit_thread(void)
14022 tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
14023 put_cpu();
14024 }
14025 + pfm_exit_thread();
14026 }
14027
14028 void flush_thread(void)
14029 @@ -334,6 +336,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
14030
14031 savesegment(gs, p->thread.gs);
14032
14033 + pfm_copy_thread(p);
14034 +
14035 tsk = current;
14036 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
14037 p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
14038 @@ -448,6 +452,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
14039 prev = &prev_p->thread;
14040 next = &next_p->thread;
14041
14042 + if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
14043 + pfm_ctxsw_out(prev_p, next_p);
14044 +
14045 debugctl = prev->debugctlmsr;
14046 if (next->ds_area_msr != prev->ds_area_msr) {
14047 /* we clear debugctl to make sure DS
14048 @@ -460,6 +467,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
14049 if (next->debugctlmsr != debugctl)
14050 update_debugctlmsr(next->debugctlmsr);
14051
14052 + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
14053 + pfm_ctxsw_in(prev_p, next_p);
14054 +
14055 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
14056 set_debugreg(next->debugreg0, 0);
14057 set_debugreg(next->debugreg1, 1);
14058 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
14059 index e12e0e4..97d49ce 100644
14060 --- a/arch/x86/kernel/process_64.c
14061 +++ b/arch/x86/kernel/process_64.c
14062 @@ -36,6 +36,7 @@
14063 #include <linux/kprobes.h>
14064 #include <linux/kdebug.h>
14065 #include <linux/tick.h>
14066 +#include <linux/perfmon_kern.h>
14067 #include <linux/prctl.h>
14068
14069 #include <asm/uaccess.h>
14070 @@ -240,6 +241,7 @@ void exit_thread(void)
14071 t->io_bitmap_max = 0;
14072 put_cpu();
14073 }
14074 + pfm_exit_thread();
14075 }
14076
14077 void flush_thread(void)
14078 @@ -344,6 +346,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
14079 savesegment(es, p->thread.es);
14080 savesegment(ds, p->thread.ds);
14081
14082 + pfm_copy_thread(p);
14083 +
14084 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
14085 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
14086 if (!p->thread.io_bitmap_ptr) {
14087 @@ -472,6 +476,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
14088 prev = &prev_p->thread,
14089 next = &next_p->thread;
14090
14091 + if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
14092 + pfm_ctxsw_out(prev_p, next_p);
14093 +
14094 debugctl = prev->debugctlmsr;
14095 if (next->ds_area_msr != prev->ds_area_msr) {
14096 /* we clear debugctl to make sure DS
14097 @@ -484,6 +491,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
14098 if (next->debugctlmsr != debugctl)
14099 update_debugctlmsr(next->debugctlmsr);
14100
14101 + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
14102 + pfm_ctxsw_in(prev_p, next_p);
14103 +
14104 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
14105 loaddebug(next, 0);
14106 loaddebug(next, 1);
14107 diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
14108 index 6fb5bcd..53e6665 100644
14109 --- a/arch/x86/kernel/signal_32.c
14110 +++ b/arch/x86/kernel/signal_32.c
14111 @@ -18,6 +18,7 @@
14112 #include <linux/sched.h>
14113 #include <linux/wait.h>
14114 #include <linux/elf.h>
14115 +#include <linux/perfmon_kern.h>
14116 #include <linux/smp.h>
14117 #include <linux/mm.h>
14118
14119 @@ -657,6 +658,10 @@ static void do_signal(struct pt_regs *regs)
14120 void
14121 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
14122 {
14123 + /* process perfmon asynchronous work (e.g. block thread or reset) */
14124 + if (thread_info_flags & _TIF_PERFMON_WORK)
14125 + pfm_handle_work(regs);
14126 +
14127 /* deal with pending signal delivery */
14128 if (thread_info_flags & _TIF_SIGPENDING)
14129 do_signal(regs);
14130 diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
14131 index ca316b5..6e9fa74 100644
14132 --- a/arch/x86/kernel/signal_64.c
14133 +++ b/arch/x86/kernel/signal_64.c
14134 @@ -19,6 +19,7 @@
14135 #include <linux/stddef.h>
14136 #include <linux/personality.h>
14137 #include <linux/compiler.h>
14138 +#include <linux/perfmon_kern.h>
14139 #include <asm/processor.h>
14140 #include <asm/ucontext.h>
14141 #include <asm/uaccess.h>
14142 @@ -549,12 +550,17 @@ static void do_signal(struct pt_regs *regs)
14143 void do_notify_resume(struct pt_regs *regs, void *unused,
14144 __u32 thread_info_flags)
14145 {
14146 +
14147 #ifdef CONFIG_X86_MCE
14148 /* notify userspace of pending MCEs */
14149 if (thread_info_flags & _TIF_MCE_NOTIFY)
14150 mce_notify_user();
14151 #endif /* CONFIG_X86_MCE */
14152
14153 + /* process perfmon asynchronous work (e.g. block thread or reset) */
14154 + if (thread_info_flags & _TIF_PERFMON_WORK)
14155 + pfm_handle_work(regs);
14156 +
14157 /* deal with pending signal delivery */
14158 if (thread_info_flags & _TIF_SIGPENDING)
14159 do_signal(regs);
14160 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
14161 index 7985c5b..9ddf6db 100644
14162 --- a/arch/x86/kernel/smpboot.c
14163 +++ b/arch/x86/kernel/smpboot.c
14164 @@ -42,6 +42,7 @@
14165 #include <linux/init.h>
14166 #include <linux/smp.h>
14167 #include <linux/module.h>
14168 +#include <linux/perfmon_kern.h>
14169 #include <linux/sched.h>
14170 #include <linux/percpu.h>
14171 #include <linux/bootmem.h>
14172 @@ -1382,6 +1383,7 @@ int __cpu_disable(void)
14173 remove_cpu_from_maps(cpu);
14174 unlock_vector_lock();
14175 fixup_irqs(cpu_online_map);
14176 + pfm_cpu_disable();
14177 return 0;
14178 }
14179
14180 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
14181 index d44395f..e1384a9 100644
14182 --- a/arch/x86/kernel/syscall_table_32.S
14183 +++ b/arch/x86/kernel/syscall_table_32.S
14184 @@ -332,3 +332,15 @@ ENTRY(sys_call_table)
14185 .long sys_dup3 /* 330 */
14186 .long sys_pipe2
14187 .long sys_inotify_init1
14188 + .long sys_pfm_create_context
14189 + .long sys_pfm_write_pmcs
14190 + .long sys_pfm_write_pmds /* 335 */
14191 + .long sys_pfm_read_pmds
14192 + .long sys_pfm_load_context
14193 + .long sys_pfm_start
14194 + .long sys_pfm_stop
14195 + .long sys_pfm_restart /* 340 */
14196 + .long sys_pfm_create_evtsets
14197 + .long sys_pfm_getinfo_evtsets
14198 + .long sys_pfm_delete_evtsets
14199 + .long sys_pfm_unload_context
14200 diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
14201 index 8a5f161..10faef5 100644
14202 --- a/arch/x86/oprofile/nmi_int.c
14203 +++ b/arch/x86/oprofile/nmi_int.c
14204 @@ -16,6 +16,7 @@
14205 #include <linux/moduleparam.h>
14206 #include <linux/kdebug.h>
14207 #include <linux/cpu.h>
14208 +#include <linux/perfmon_kern.h>
14209 #include <asm/nmi.h>
14210 #include <asm/msr.h>
14211 #include <asm/apic.h>
14212 @@ -217,12 +218,18 @@ static int nmi_setup(void)
14213 int err = 0;
14214 int cpu;
14215
14216 - if (!allocate_msrs())
14217 + if (pfm_session_allcpus_acquire())
14218 + return -EBUSY;
14219 +
14220 + if (!allocate_msrs()) {
14221 + pfm_session_allcpus_release();
14222 return -ENOMEM;
14223 + }
14224
14225 err = register_die_notifier(&profile_exceptions_nb);
14226 if (err) {
14227 free_msrs();
14228 + pfm_session_allcpus_release();
14229 return err;
14230 }
14231
14232 @@ -304,6 +311,7 @@ static void nmi_shutdown(void)
14233 model->shutdown(msrs);
14234 free_msrs();
14235 put_cpu_var(cpu_msrs);
14236 + pfm_session_allcpus_release();
14237 }
14238
14239 static void nmi_cpu_start(void *dummy)
14240 diff --git a/arch/x86/perfmon/Kconfig b/arch/x86/perfmon/Kconfig
14241 new file mode 100644
14242 index 0000000..08842e6
14243 --- /dev/null
14244 +++ b/arch/x86/perfmon/Kconfig
14245 @@ -0,0 +1,89 @@
14246 +menu "Hardware Performance Monitoring support"
14247 +config PERFMON
14248 + bool "Perfmon2 performance monitoring interface"
14249 + select X86_LOCAL_APIC
14250 + default n
14251 + help
14252 + Enables the perfmon2 interface to access the hardware
14253 + performance counters. See <http://perfmon2.sf.net/> for
14254 + more details.
14255 +
14256 +config PERFMON_DEBUG
14257 + bool "Perfmon debugging"
14258 + default n
14259 + depends on PERFMON
14260 + help
14261 + Enables perfmon debugging support
14262 +
14263 +config PERFMON_DEBUG_FS
14264 + bool "Enable perfmon statistics reporting via debugfs"
14265 + default y
14266 + depends on PERFMON && DEBUG_FS
14267 + help
14268 + Enable collection and reporting of perfmon timing statistics under
14269 + debugfs. This is used for debugging and performance analysis of the
14270 + subsystem.The debugfs filesystem must be mounted.
14271 +
14272 +config X86_PERFMON_P6
14273 + tristate "Support for Intel P6/Pentium M processor hardware performance counters"
14274 + depends on PERFMON && X86_32
14275 + default n
14276 + help
14277 + Enables support for Intel P6-style hardware performance counters.
14278 + To be used for with Intel Pentium III, PentiumPro, Pentium M processors.
14279 +
14280 +config X86_PERFMON_P4
14281 + tristate "Support for Intel Pentium 4/Xeon hardware performance counters"
14282 + depends on PERFMON
14283 + default n
14284 + help
14285 + Enables support for Intel Pentium 4/Xeon (Netburst) hardware performance
14286 + counters.
14287 +
14288 +config X86_PERFMON_PEBS_P4
14289 + tristate "Support for Intel Netburst Precise Event-Based Sampling (PEBS)"
14290 + depends on PERFMON && X86_PERFMON_P4
14291 + default n
14292 + help
14293 + Enables support for Precise Event-Based Sampling (PEBS) on the Intel
14294 + Netburst processors such as Pentium 4, Xeon which support it.
14295 +
14296 +config X86_PERFMON_CORE
14297 + tristate "Support for Intel Core-based performance counters"
14298 + depends on PERFMON
14299 + default n
14300 + help
14301 + Enables support for Intel Core-based performance counters. Enable
14302 + this option to support Intel Core 2 processors.
14303 +
14304 +config X86_PERFMON_PEBS_CORE
14305 + tristate "Support for Intel Core Precise Event-Based Sampling (PEBS)"
14306 + depends on PERFMON && X86_PERFMON_CORE
14307 + default n
14308 + help
14309 + Enables support for Precise Event-Based Sampling (PEBS) on the Intel
14310 + Core processors.
14311 +
14312 +config X86_PERFMON_INTEL_ATOM
14313 + tristate "Support for Intel Atom processor"
14314 + depends on PERFMON
14315 + default n
14316 + help
14317 + Enables support for Intel Atom processors.
14318 +
14319 +config X86_PERFMON_INTEL_ARCH
14320 + tristate "Support for Intel architectural perfmon v1/v2"
14321 + depends on PERFMON
14322 + default n
14323 + help
14324 + Enables support for Intel architectural performance counters.
14325 + This feature was introduced with Intel Core Solo/Core Duo processors.
14326 +
14327 +config X86_PERFMON_AMD64
14328 + tristate "Support AMD Athlon64/Opteron64 hardware performance counters"
14329 + depends on PERFMON
14330 + default n
14331 + help
14332 + Enables support for Athlon64/Opterton64 hardware performance counters.
14333 + Support for family 6, 15 and 16(10H) processors.
14334 +endmenu
14335 diff --git a/arch/x86/perfmon/Makefile b/arch/x86/perfmon/Makefile
14336 new file mode 100644
14337 index 0000000..1cbed3e
14338 --- /dev/null
14339 +++ b/arch/x86/perfmon/Makefile
14340 @@ -0,0 +1,13 @@
14341 +#
14342 +# Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
14343 +# Contributed by Stephane Eranian <eranian@hpl.hp.com>
14344 +#
14345 +obj-$(CONFIG_PERFMON) += perfmon.o
14346 +obj-$(CONFIG_X86_PERFMON_P6) += perfmon_p6.o
14347 +obj-$(CONFIG_X86_PERFMON_P4) += perfmon_p4.o
14348 +obj-$(CONFIG_X86_PERFMON_CORE) += perfmon_intel_core.o
14349 +obj-$(CONFIG_X86_PERFMON_INTEL_ARCH) += perfmon_intel_arch.o
14350 +obj-$(CONFIG_X86_PERFMON_PEBS_P4) += perfmon_pebs_p4_smpl.o
14351 +obj-$(CONFIG_X86_PERFMON_PEBS_CORE) += perfmon_pebs_core_smpl.o
14352 +obj-$(CONFIG_X86_PERFMON_AMD64) += perfmon_amd64.o
14353 +obj-$(CONFIG_X86_PERFMON_INTEL_ATOM) += perfmon_intel_atom.o
14354 diff --git a/arch/x86/perfmon/perfmon.c b/arch/x86/perfmon/perfmon.c
14355 new file mode 100644
14356 index 0000000..e727fed
14357 --- /dev/null
14358 +++ b/arch/x86/perfmon/perfmon.c
14359 @@ -0,0 +1,761 @@
14360 +/*
14361 + * This file implements the X86 specific support for the perfmon2 interface
14362 + *
14363 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
14364 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
14365 + *
14366 + * Copyright (c) 2007 Advanced Micro Devices, Inc.
14367 + * Contributed by Robert Richter <robert.richter@amd.com>
14368 + *
14369 + * This program is free software; you can redistribute it and/or
14370 + * modify it under the terms of version 2 of the GNU General Public
14371 + * License as published by the Free Software Foundation.
14372 + *
14373 + * This program is distributed in the hope that it will be useful,
14374 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
14375 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14376 + * General Public License for more details.
14377 + *
14378 + * You should have received a copy of the GNU General Public License
14379 + * along with this program; if not, write to the Free Software
14380 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
14381 + * 02111-1307 USA
14382 + */
14383 +#include <linux/interrupt.h>
14384 +#include <linux/perfmon_kern.h>
14385 +#include <linux/kprobes.h>
14386 +#include <linux/kdebug.h>
14387 +#include <linux/nmi.h>
14388 +
14389 +#include <asm/apic.h>
14390 +
14391 +DEFINE_PER_CPU(unsigned long, real_iip);
14392 +DEFINE_PER_CPU(int, pfm_using_nmi);
14393 +DEFINE_PER_CPU(unsigned long, saved_lvtpc);
14394 +
14395 +/**
14396 + * pfm_arch_ctxswin_thread - thread context switch in
14397 + * @task: task switched in
14398 + * @ctx: context for the task
14399 + *
14400 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
14401 + * set cannot be NULL. Context is locked. Interrupts are masked.
14402 + *
14403 + * Caller has already restored all PMD and PMC registers, if
14404 + * necessary (i.e., lazy restore scheme).
14405 + *
14406 + * On x86, the only common code just needs to unsecure RDPMC if necessary
14407 + *
14408 + * On model-specific features, e.g., PEBS, IBS, are taken care of in the
14409 + * corresponding PMU description module
14410 + */
14411 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
14412 +{
14413 + struct pfm_arch_context *ctx_arch;
14414 +
14415 + ctx_arch = pfm_ctx_arch(ctx);
14416 +
14417 + /*
14418 + * restore saved real iip
14419 + */
14420 + if (ctx->active_set->npend_ovfls)
14421 + __get_cpu_var(real_iip) = ctx_arch->saved_real_iip;
14422 +
14423 + /*
14424 + * enable RDPMC on this CPU
14425 + */
14426 + if (ctx_arch->flags.insecure)
14427 + set_in_cr4(X86_CR4_PCE);
14428 +}
14429 +
14430 +/**
14431 + * pfm_arch_ctxswout_thread - context switch out thread
14432 + * @task: task switched out
14433 + * @ctx : context switched out
14434 + *
14435 + * Called from pfm_ctxsw(). Task is guaranteed to be current.
14436 + * Context is locked. Interrupts are masked. Monitoring may be active.
14437 + * PMU access is guaranteed. PMC and PMD registers are live in PMU.
14438 + *
14439 + * Return:
14440 + * non-zero : did not save PMDs (as part of stopping the PMU)
14441 + * 0 : saved PMDs (no need to save them in caller)
14442 + */
14443 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
14444 +{
14445 + struct pfm_arch_context *ctx_arch;
14446 + struct pfm_arch_pmu_info *pmu_info;
14447 +
14448 + ctx_arch = pfm_ctx_arch(ctx);
14449 + pmu_info = pfm_pmu_info();
14450 +
14451 + /*
14452 + * disable lazy restore of PMCS on ctxswin because
14453 + * we modify some of them.
14454 + */
14455 + ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
14456 +
14457 + if (ctx->active_set->npend_ovfls)
14458 + ctx_arch->saved_real_iip = __get_cpu_var(real_iip);
14459 +
14460 + /*
14461 + * disable RDPMC on this CPU
14462 + */
14463 + if (ctx_arch->flags.insecure)
14464 + clear_in_cr4(X86_CR4_PCE);
14465 +
14466 + if (ctx->state == PFM_CTX_MASKED)
14467 + return 1;
14468 +
14469 + return pmu_info->stop_save(ctx, ctx->active_set);
14470 +}
14471 +
14472 +/**
14473 + * pfm_arch_stop - deactivate monitoring
14474 + * @task: task to stop
14475 + * @ctx: context to stop
14476 + *
14477 + * Called from pfm_stop()
14478 + * Interrupts are masked. Context is locked. Set is the active set.
14479 + *
14480 + * For per-thread:
14481 + * task is not necessarily current. If not current task, then
14482 + * task is guaranteed stopped and off any cpu. Access to PMU
14483 + * is not guaranteed.
14484 + *
14485 + * For system-wide:
14486 + * task is current
14487 + *
14488 + * must disable active monitoring. ctx cannot be NULL
14489 + */
14490 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
14491 +{
14492 + struct pfm_arch_pmu_info *pmu_info;
14493 +
14494 + pmu_info = pfm_pmu_info();
14495 +
14496 + /*
14497 + * no need to go through stop_save()
14498 + * if we are already stopped
14499 + */
14500 + if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
14501 + return;
14502 +
14503 + if (task != current)
14504 + return;
14505 +
14506 + pmu_info->stop_save(ctx, ctx->active_set);
14507 +}
14508 +
14509 +
14510 +/**
14511 + * pfm_arch_start - activate monitoring
14512 + * @task: task to start
14513 + * @ctx: context to stop
14514 + *
14515 + * Interrupts are masked. Context is locked.
14516 + *
14517 + * For per-thread:
14518 + * Task is not necessarily current. If not current task, then task
14519 + * is guaranteed stopped and off any cpu. No access to PMU is task
14520 + * is not current.
14521 + *
14522 + * For system-wide:
14523 + * task is always current
14524 + */
14525 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
14526 +{
14527 + struct pfm_event_set *set;
14528 +
14529 + set = ctx->active_set;
14530 +
14531 + if (task != current)
14532 + return;
14533 +
14534 + /*
14535 + * cannot restore PMC if no access to PMU. Will be done
14536 + * when the thread is switched back in
14537 + */
14538 +
14539 + pfm_arch_restore_pmcs(ctx, set);
14540 +}
14541 +
14542 +/**
14543 + * pfm_arch_restore_pmds - reload PMD registers
14544 + * @ctx: context to restore from
14545 + * @set: current event set
14546 + *
14547 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
14548 + * pfm_context_load_sys(), pfm_ctxsw()
14549 + *
14550 + * Context is locked. Interrupts are masked. Set cannot be NULL.
14551 + * Access to the PMU is guaranteed.
14552 + */
14553 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
14554 +{
14555 + struct pfm_arch_pmu_info *pmu_info;
14556 + u16 i, num;
14557 +
14558 + pmu_info = pfm_pmu_info();
14559 +
14560 + num = set->nused_pmds;
14561 +
14562 + /*
14563 + * model-specific override
14564 + */
14565 + if (pmu_info->restore_pmds) {
14566 + pmu_info->restore_pmds(ctx, set);
14567 + return;
14568 + }
14569 +
14570 + /*
14571 + * we can restore only the PMD we use because:
14572 + *
14573 + * - can only read with pfm_read_pmds() the registers
14574 + * declared used via pfm_write_pmds(), smpl_pmds, reset_pmds
14575 + *
14576 + * - if cr4.pce=1, only counters are exposed to user. RDPMC
14577 + * does not work with other types of PMU registers.Thus, no
14578 + * address is ever exposed by counters
14579 + *
14580 + * - there is never a dependency between one pmd register and
14581 + * another
14582 + */
14583 + for (i = 0; num; i++) {
14584 + if (likely(test_bit(i, cast_ulp(set->used_pmds)))) {
14585 + pfm_write_pmd(ctx, i, set->pmds[i].value);
14586 + num--;
14587 + }
14588 + }
14589 +}
14590 +
14591 +/**
14592 + * pfm_arch_restore_pmcs - reload PMC registers
14593 + * @ctx: context to restore from
14594 + * @set: current event set
14595 + *
14596 + * function called from pfm_switch_sets(), pfm_context_load_thread(),
14597 + * pfm_context_load_sys(), pfm_ctxsw().
14598 + *
14599 + * Context is locked. Interrupts are masked. set cannot be NULL.
14600 + * Access to the PMU is guaranteed.
14601 + *
14602 + * function must restore all PMC registers from set
14603 + */
14604 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
14605 +{
14606 + struct pfm_arch_pmu_info *pmu_info;
14607 + u64 *mask;
14608 + u16 i, num;
14609 +
14610 + pmu_info = pfm_pmu_info();
14611 +
14612 + /*
14613 + * we need to restore PMCs only when:
14614 + * - context is not masked
14615 + * - monitoring activated
14616 + *
14617 + * Masking monitoring after an overflow does not change the
14618 + * value of flags.started
14619 + */
14620 + if (ctx->state == PFM_CTX_MASKED || !ctx->flags.started)
14621 + return;
14622 +
14623 + /*
14624 + * model-specific override
14625 + */
14626 + if (pmu_info->restore_pmcs) {
14627 + pmu_info->restore_pmcs(ctx, set);
14628 + return;
14629 + }
14630 + /*
14631 + * restore all pmcs
14632 + *
14633 + * It is not possible to restore only the pmcs we used because
14634 + * certain PMU models (e.g. Pentium 4) have dependencies. Thus
14635 + * we do not want one application using stale PMC coming from
14636 + * another one.
14637 + *
14638 + * On PMU models where there is no dependencies between pmc, then
14639 + * it is possible to optimize by only restoring the registers that
14640 + * are used, and this can be done with the models-specific override
14641 + * for this function.
14642 + *
14643 + * The default code takes the safest approach, i.e., assume the worse
14644 + */
14645 + mask = ctx->regs.pmcs;
14646 + num = ctx->regs.num_pmcs;
14647 + for (i = 0; num; i++) {
14648 + if (test_bit(i, cast_ulp(mask))) {
14649 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
14650 + num--;
14651 + }
14652 + }
14653 +}
14654 +
14655 +/**
14656 + * smp_pmu_interrupt - lowest level PMU interrupt handler for X86
14657 + * @regs: machine state
14658 + *
14659 + * The PMU interrupt is handled through an interrupt gate, therefore
14660 + * the CPU automatically clears the EFLAGS.IF, i.e., masking interrupts.
14661 + *
14662 + * The perfmon interrupt handler MUST run with interrupts disabled due
14663 + * to possible race with other, higher priority interrupts, such as timer
14664 + * or IPI function calls.
14665 + *
14666 + * See description in IA-32 architecture manual, Vol 3 section 5.8.1
14667 + */
14668 +void smp_pmu_interrupt(struct pt_regs *regs)
14669 +{
14670 + struct pfm_arch_pmu_info *pmu_info;
14671 + struct pfm_context *ctx;
14672 + unsigned long iip;
14673 + int using_nmi;
14674 +
14675 + using_nmi = __get_cpu_var(pfm_using_nmi);
14676 +
14677 + ack_APIC_irq();
14678 +
14679 + irq_enter();
14680 +
14681 + /*
14682 + * when using NMI, pfm_handle_nmi() gets called
14683 + * first. It stops monitoring and record the
14684 + * iip into real_iip, then it repost the interrupt
14685 + * using the lower priority vector LOCAL_PERFMON_VECTOR
14686 + *
14687 + * On some processors, e.g., P4, it may be that some
14688 + * state is already recorded from pfm_handle_nmi()
14689 + * and it only needs to be copied back into the normal
14690 + * fields so it can be used transparently by higher level
14691 + * code.
14692 + */
14693 + if (using_nmi) {
14694 + ctx = __get_cpu_var(pmu_ctx);
14695 + pmu_info = pfm_pmu_info();
14696 + iip = __get_cpu_var(real_iip);
14697 + if (ctx && pmu_info->nmi_copy_state)
14698 + pmu_info->nmi_copy_state(ctx);
14699 + } else
14700 + iip = instruction_pointer(regs);
14701 +
14702 + pfm_interrupt_handler(iip, regs);
14703 +
14704 + /*
14705 + * On Intel P6, Pentium M, P4, Intel Core:
14706 + * - it is necessary to clear the MASK field for the LVTPC
14707 + * vector. Otherwise interrupts remain masked. See
14708 + * section 8.5.1
14709 + * AMD X86-64:
14710 + * - the documentation does not stipulate the behavior.
14711 + * To be safe, we also rewrite the vector to clear the
14712 + * mask field
14713 + */
14714 + if (!using_nmi && current_cpu_data.x86_vendor == X86_VENDOR_INTEL)
14715 + apic_write(APIC_LVTPC, LOCAL_PERFMON_VECTOR);
14716 +
14717 + irq_exit();
14718 +}
14719 +
14720 +/**
14721 + * pfm_handle_nmi - PMU NMI handler notifier callback
14722 + * @nb ; notifier block
14723 + * @val: type of die notifier
14724 + * @data: die notifier-specific data
14725 + *
14726 + * called from notify_die() notifier from an trap handler path. We only
14727 + * care about NMI related callbacks, and ignore everything else.
14728 + *
14729 + * Cannot grab any locks, include the perfmon context lock
14730 + *
14731 + * Must detect if NMI interrupt comes from perfmon, and if so it must
14732 + * stop the PMU and repost a lower-priority interrupt. The perfmon interrupt
14733 + * handler needs to grab the context lock, thus is cannot be run directly
14734 + * from the NMI interrupt call path.
14735 + */
14736 +static int __kprobes pfm_handle_nmi(struct notifier_block *nb,
14737 + unsigned long val,
14738 + void *data)
14739 +{
14740 + struct die_args *args = data;
14741 + struct pfm_context *ctx;
14742 + struct pfm_arch_pmu_info *pmu_info;
14743 +
14744 + /*
14745 + * only NMI related calls
14746 + */
14747 + if (val != DIE_NMI_IPI)
14748 + return NOTIFY_DONE;
14749 +
14750 + /*
14751 + * perfmon not using NMI
14752 + */
14753 + if (!__get_cpu_var(pfm_using_nmi))
14754 + return NOTIFY_DONE;
14755 +
14756 + /*
14757 + * No context
14758 + */
14759 + ctx = __get_cpu_var(pmu_ctx);
14760 + if (!ctx) {
14761 + PFM_DBG_ovfl("no ctx");
14762 + return NOTIFY_DONE;
14763 + }
14764 +
14765 + /*
14766 + * Detect if we have overflows, i.e., NMI interrupt
14767 + * caused by PMU
14768 + */
14769 + pmu_info = pfm_pmu_conf->pmu_info;
14770 + if (!pmu_info->has_ovfls(ctx)) {
14771 + PFM_DBG_ovfl("no ovfl");
14772 + return NOTIFY_DONE;
14773 + }
14774 +
14775 + /*
14776 + * we stop the PMU to avoid further overflow before this
14777 + * one is treated by lower priority interrupt handler
14778 + */
14779 + pmu_info->quiesce();
14780 +
14781 + /*
14782 + * record actual instruction pointer
14783 + */
14784 + __get_cpu_var(real_iip) = instruction_pointer(args->regs);
14785 +
14786 + /*
14787 + * post lower priority interrupt (LOCAL_PERFMON_VECTOR)
14788 + */
14789 + pfm_arch_resend_irq(ctx);
14790 +
14791 + pfm_stats_inc(ovfl_intr_nmi_count);
14792 +
14793 + /*
14794 + * we need to rewrite the APIC vector on Intel
14795 + */
14796 + if (current_cpu_data.x86_vendor == X86_VENDOR_INTEL)
14797 + apic_write(APIC_LVTPC, APIC_DM_NMI);
14798 +
14799 + /*
14800 + * the notification was for us
14801 + */
14802 + return NOTIFY_STOP;
14803 +}
14804 +
14805 +static struct notifier_block pfm_nmi_nb = {
14806 + .notifier_call = pfm_handle_nmi
14807 +};
14808 +
14809 +/**
14810 + * pfm_arch_get_pmu_module_name - get PMU description module name for autoload
14811 + *
14812 + * called from pfm_pmu_request_module
14813 + */
14814 +char *pfm_arch_get_pmu_module_name(void)
14815 +{
14816 + switch (current_cpu_data.x86) {
14817 + case 6:
14818 + switch (current_cpu_data.x86_model) {
14819 + case 3: /* Pentium II */
14820 + case 7 ... 11:
14821 + case 13:
14822 + return "perfmon_p6";
14823 + case 15: /* Merom */
14824 + case 23: /* Penryn */
14825 + return "perfmon_intel_core";
14826 + case 28: /* Atom/Silverthorne */
14827 + return "perfmon_intel_atom";
14828 + case 29: /* Dunnington */
14829 + return "perfmon_intel_core";
14830 + default:
14831 + goto try_arch;
14832 + }
14833 + case 15:
14834 + case 16:
14835 + /* All Opteron processors */
14836 + if (current_cpu_data.x86_vendor == X86_VENDOR_AMD)
14837 + return "perfmon_amd64";
14838 +
14839 + switch (current_cpu_data.x86_model) {
14840 + case 0 ... 6:
14841 + return "perfmon_p4";
14842 + }
14843 + /* FALL THROUGH */
14844 + default:
14845 +try_arch:
14846 + if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
14847 + return "perfmon_intel_arch";
14848 + return NULL;
14849 + }
14850 + return NULL;
14851 +}
14852 +
14853 +/**
14854 + * pfm_arch_resend_irq - post perfmon interrupt on regular vector
14855 + *
14856 + * called from pfm_ctxswin_thread() and pfm_handle_nmi()
14857 + */
14858 +void pfm_arch_resend_irq(struct pfm_context *ctx)
14859 +{
14860 + unsigned long val, dest;
14861 + /*
14862 + * we cannot use hw_resend_irq() because it goes to
14863 + * the I/O APIC. We need to go to the Local APIC.
14864 + *
14865 + * The "int vec" is not the right solution either
14866 + * because it triggers a software intr. We need
14867 + * to regenerate the interrupt and have it pended
14868 + * until we unmask interrupts.
14869 + *
14870 + * Instead we send ourself an IPI on the perfmon
14871 + * vector.
14872 + */
14873 + val = APIC_DEST_SELF|APIC_INT_ASSERT|
14874 + APIC_DM_FIXED|LOCAL_PERFMON_VECTOR;
14875 +
14876 + dest = apic_read(APIC_ID);
14877 + apic_write(APIC_ICR2, dest);
14878 + apic_write(APIC_ICR, val);
14879 +}
14880 +
14881 +/**
14882 + * pfm_arch_pmu_acquire_percpu - setup APIC per CPU
14883 + * @data: contains pmu flags
14884 + */
14885 +static void pfm_arch_pmu_acquire_percpu(void *data)
14886 +{
14887 +
14888 + struct pfm_arch_pmu_info *pmu_info;
14889 + unsigned int tmp, vec;
14890 + unsigned long flags = (unsigned long)data;
14891 + unsigned long lvtpc;
14892 +
14893 + pmu_info = pfm_pmu_conf->pmu_info;
14894 +
14895 + /*
14896 + * we only reprogram the LVTPC vector if we have detected
14897 + * no sharing, otherwise it means the APIC is already programmed
14898 + * and we use whatever vector (likely NMI) is there
14899 + */
14900 + if (!(flags & PFM_X86_FL_SHARING)) {
14901 + if (flags & PFM_X86_FL_USE_NMI)
14902 + vec = APIC_DM_NMI;
14903 + else
14904 + vec = LOCAL_PERFMON_VECTOR;
14905 +
14906 + tmp = apic_read(APIC_LVTERR);
14907 + apic_write(APIC_LVTERR, tmp | APIC_LVT_MASKED);
14908 + apic_write(APIC_LVTPC, vec);
14909 + apic_write(APIC_LVTERR, tmp);
14910 + }
14911 + lvtpc = (unsigned long)apic_read(APIC_LVTPC);
14912 +
14913 + __get_cpu_var(pfm_using_nmi) = lvtpc == APIC_DM_NMI;
14914 +
14915 + PFM_DBG("LTVPC=0x%lx using_nmi=%d", lvtpc, __get_cpu_var(pfm_using_nmi));
14916 +
14917 + /*
14918 + * invoke model specific acquire routine. May be used for
14919 + * model-specific initializations
14920 + */
14921 + if (pmu_info->acquire_pmu_percpu)
14922 + pmu_info->acquire_pmu_percpu();
14923 +}
14924 +
14925 +/**
14926 + * pfm_arch_pmu_acquire - acquire PMU resource from system
14927 + * @unavail_pmcs : bitmask to use to set unavailable pmcs
14928 + * @unavail_pmds : bitmask to use to set unavailable pmds
14929 + *
14930 + * interrupts are not masked
14931 + *
14932 + * Grab PMU registers from lower level MSR allocator
14933 + *
14934 + * Program the APIC according the possible interrupt vector
14935 + * either LOCAL_PERFMON_VECTOR or NMI
14936 + */
14937 +int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
14938 +{
14939 + struct pfm_arch_pmu_info *pmu_info;
14940 + struct pfm_regmap_desc *d;
14941 + u16 i, nlost;
14942 +
14943 + pmu_info = pfm_pmu_conf->pmu_info;
14944 + pmu_info->flags &= ~PFM_X86_FL_SHARING;
14945 +
14946 + nlost = 0;
14947 +
14948 + d = pfm_pmu_conf->pmc_desc;
14949 + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
14950 + if (!(d->type & PFM_REG_I))
14951 + continue;
14952 +
14953 + if (d->type & PFM_REG_V)
14954 + continue;
14955 + /*
14956 + * reserve register with lower-level allocator
14957 + */
14958 + if (!reserve_evntsel_nmi(d->hw_addr)) {
14959 + PFM_DBG("pmc%d(%s) already used", i, d->desc);
14960 + __set_bit(i, cast_ulp(unavail_pmcs));
14961 + nlost++;
14962 + continue;
14963 + }
14964 + }
14965 + PFM_DBG("nlost=%d info_flags=0x%x\n", nlost, pmu_info->flags);
14966 + /*
14967 + * some PMU models (e.g., P6) do not support sharing
14968 + * so check if we found less than the expected number of PMC registers
14969 + */
14970 + if (nlost) {
14971 + if (pmu_info->flags & PFM_X86_FL_NO_SHARING) {
14972 + PFM_INFO("PMU already used by another subsystem, "
14973 + "PMU does not support sharing, "
14974 + "try disabling Oprofile or "
14975 + "reboot with nmi_watchdog=0");
14976 + goto undo;
14977 + }
14978 + pmu_info->flags |= PFM_X86_FL_SHARING;
14979 + }
14980 +
14981 + d = pfm_pmu_conf->pmd_desc;
14982 + for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) {
14983 + if (!(d->type & PFM_REG_I))
14984 + continue;
14985 +
14986 + if (d->type & PFM_REG_V)
14987 + continue;
14988 +
14989 + if (!reserve_perfctr_nmi(d->hw_addr)) {
14990 + PFM_DBG("pmd%d(%s) already used", i, d->desc);
14991 + __set_bit(i, cast_ulp(unavail_pmds));
14992 + }
14993 + }
14994 + /*
14995 + * program APIC on each CPU
14996 + */
14997 + on_each_cpu(pfm_arch_pmu_acquire_percpu,
14998 + (void *)(unsigned long)pmu_info->flags , 1);
14999 +
15000 + return 0;
15001 +undo:
15002 + /*
15003 + * must undo reservation of pmcs in case of error
15004 + */
15005 + d = pfm_pmu_conf->pmc_desc;
15006 + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
15007 + if (!(d->type & (PFM_REG_I|PFM_REG_V)))
15008 + continue;
15009 + if (!test_bit(i, cast_ulp(unavail_pmcs)))
15010 + release_evntsel_nmi(d->hw_addr);
15011 + }
15012 + return -EBUSY;
15013 +}
15014 +/**
15015 + * pfm-arch_pmu_release_percpu - clear NMI state for one CPU
15016 + *
15017 + */
15018 +static void pfm_arch_pmu_release_percpu(void *data)
15019 +{
15020 + struct pfm_arch_pmu_info *pmu_info;
15021 +
15022 + pmu_info = pfm_pmu_conf->pmu_info;
15023 +
15024 + __get_cpu_var(pfm_using_nmi) = 0;
15025 +
15026 + /*
15027 + * invoke model specific release routine.
15028 + * May be used to undo certain initializations
15029 + * or free some model-specific ressources.
15030 + */
15031 + if (pmu_info->release_pmu_percpu)
15032 + pmu_info->release_pmu_percpu();
15033 +}
15034 +
15035 +/**
15036 + * pfm_arch_pmu_release - release PMU resource to system
15037 + *
15038 + * called from pfm_pmu_release()
15039 + * interrupts are not masked
15040 + *
15041 + * On x86, we return the PMU registers to the MSR allocator
15042 + */
15043 +void pfm_arch_pmu_release(void)
15044 +{
15045 + struct pfm_regmap_desc *d;
15046 + u16 i, n;
15047 +
15048 + d = pfm_pmu_conf->pmc_desc;
15049 + n = pfm_pmu_conf->regs_all.num_pmcs;
15050 + for (i = 0; n; i++, d++) {
15051 + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15052 + continue;
15053 + release_evntsel_nmi(d->hw_addr);
15054 + n--;
15055 + PFM_DBG("pmc%u released", i);
15056 + }
15057 + d = pfm_pmu_conf->pmd_desc;
15058 + n = pfm_pmu_conf->regs_all.num_pmds;
15059 + for (i = 0; n; i++, d++) {
15060 + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmds)))
15061 + continue;
15062 + release_perfctr_nmi(d->hw_addr);
15063 + n--;
15064 + PFM_DBG("pmd%u released", i);
15065 + }
15066 +
15067 + /* clear NMI variable if used */
15068 + if (__get_cpu_var(pfm_using_nmi))
15069 + on_each_cpu(pfm_arch_pmu_release_percpu, NULL , 1);
15070 +}
15071 +
15072 +/**
15073 + * pfm_arch_pmu_config_init - validate PMU description structure
15074 + * @cfg: PMU description structure
15075 + *
15076 + * return:
15077 + * 0 if valid
15078 + * errno otherwise
15079 + *
15080 + * called from pfm_pmu_register()
15081 + */
15082 +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
15083 +{
15084 + struct pfm_arch_pmu_info *pmu_info;
15085 +
15086 + pmu_info = pfm_pmu_info();
15087 + if (!pmu_info) {
15088 + PFM_DBG("%s missing pmu_info", cfg->pmu_name);
15089 + return -EINVAL;
15090 + }
15091 + if (!pmu_info->has_ovfls) {
15092 + PFM_DBG("%s missing has_ovfls callback", cfg->pmu_name);
15093 + return -EINVAL;
15094 + }
15095 + if (!pmu_info->quiesce) {
15096 + PFM_DBG("%s missing quiesce callback", cfg->pmu_name);
15097 + return -EINVAL;
15098 + }
15099 + if (!pmu_info->stop_save) {
15100 + PFM_DBG("%s missing stop_save callback", cfg->pmu_name);
15101 + return -EINVAL;
15102 + }
15103 + return 0;
15104 +}
15105 +
15106 +/**
15107 + * pfm_arch_init - one time global arch-specific initialization
15108 + *
15109 + * called from pfm_init()
15110 + */
15111 +int __init pfm_arch_init(void)
15112 +{
15113 + /*
15114 + * we need to register our NMI handler when the kernels boots
15115 + * to avoid a deadlock condition with the NMI watchdog or Oprofile
15116 + * if we were to try and register/unregister on-demand.
15117 + */
15118 + register_die_notifier(&pfm_nmi_nb);
15119 + return 0;
15120 +}
15121 diff --git a/arch/x86/perfmon/perfmon_amd64.c b/arch/x86/perfmon/perfmon_amd64.c
15122 new file mode 100644
15123 index 0000000..f9b5f9c
15124 --- /dev/null
15125 +++ b/arch/x86/perfmon/perfmon_amd64.c
15126 @@ -0,0 +1,754 @@
15127 +/*
15128 + * This file contains the PMU description for the Athlon64 and Opteron64
15129 + * processors. It supports 32 and 64-bit modes.
15130 + *
15131 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
15132 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
15133 + *
15134 + * Copyright (c) 2007 Advanced Micro Devices, Inc.
15135 + * Contributed by Robert Richter <robert.richter@amd.com>
15136 + *
15137 + * This program is free software; you can redistribute it and/or
15138 + * modify it under the terms of version 2 of the GNU General Public
15139 + * License as published by the Free Software Foundation.
15140 + *
15141 + * This program is distributed in the hope that it will be useful,
15142 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15143 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15144 + * General Public License for more details.
15145 + *
15146 + * You should have received a copy of the GNU General Public License
15147 + * along with this program; if not, write to the Free Software
15148 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
15149 + * 02111-1307 USA
15150 + */
15151 +#include <linux/module.h>
15152 +#include <linux/vmalloc.h>
15153 +#include <linux/topology.h>
15154 +#include <linux/kprobes.h>
15155 +#include <linux/pci.h>
15156 +#include <linux/perfmon_kern.h>
15157 +#include <asm/hw_irq.h>
15158 +#include <asm/apic.h>
15159 +
15160 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
15161 +MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
15162 +MODULE_DESCRIPTION("AMD64 PMU description table");
15163 +MODULE_LICENSE("GPL");
15164 +
15165 +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203
15166 +
15167 +static int force_nmi;
15168 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
15169 +module_param(force_nmi, bool, 0600);
15170 +
15171 +#define HAS_IBS 0x01 /* has IBS support */
15172 +
15173 +static u8 ibs_eilvt_off, ibs_status; /* AMD: extended interrupt LVT offset */
15174 +
15175 +static void pfm_amd64_restore_pmcs(struct pfm_context *ctx,
15176 + struct pfm_event_set *set);
15177 +static void __kprobes pfm_amd64_quiesce(void);
15178 +static int pfm_amd64_has_ovfls(struct pfm_context *ctx);
15179 +static int pfm_amd64_stop_save(struct pfm_context *ctx,
15180 + struct pfm_event_set *set);
15181 +
15182 +#define IBSFETCHCTL_PMC 4 /* pmc4 */
15183 +#define IBSFETCHCTL_PMD 4 /* pmd4 */
15184 +#define IBSOPSCTL_PMC 5 /* pmc5 */
15185 +#define IBSOPSCTL_PMD 7 /* pmd7 */
15186 +
15187 +static u64 enable_mask[PFM_MAX_PMCS];
15188 +static u16 max_enable;
15189 +
15190 +static struct pfm_arch_pmu_info pfm_amd64_pmu_info = {
15191 + .stop_save = pfm_amd64_stop_save,
15192 + .has_ovfls = pfm_amd64_has_ovfls,
15193 + .quiesce = pfm_amd64_quiesce,
15194 + .restore_pmcs = pfm_amd64_restore_pmcs
15195 +};
15196 +
15197 +#define PFM_AMD64_IBSFETCHVAL (1ULL<<49) /* valid fetch sample */
15198 +#define PFM_AMD64_IBSFETCHEN (1ULL<<48) /* fetch sampling enabled */
15199 +#define PFM_AMD64_IBSOPVAL (1ULL<<18) /* valid execution sample */
15200 +#define PFM_AMD64_IBSOPEN (1ULL<<17) /* execution sampling enabled */
15201 +
15202 +/*
15203 + * force Local APIC interrupt on overflow
15204 + */
15205 +#define PFM_K8_VAL (1ULL<<20)
15206 +#define PFM_K8_NO64 (1ULL<<20)
15207 +
15208 +/*
15209 + * reserved bits must be 1
15210 + *
15211 + * for family 15:
15212 + * - upper 32 bits are reserved
15213 + * - bit 20, bit 21
15214 + *
15215 + * for family 16:
15216 + * - bits 36-39 are reserved
15217 + * - bits 42-63 are reserved
15218 + * - bit 20, bit 21
15219 + *
15220 + * for IBS registers:
15221 + * IBSFETCHCTL: all bits are reserved except bits 57, 48, 15:0
15222 + * IBSOPSCTL : all bits are reserved except bits 17, 15:0
15223 + */
15224 +#define PFM_K8_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21))
15225 +#define PFM_16_RSVD ((0x3fffffULL<<42) | (0xfULL<<36) | (1ULL<<20) | (1ULL<<21))
15226 +#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<48)|(1ULL<<57)|0xffffULL))
15227 +#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<17)|0xffffULL))
15228 +
15229 +static struct pfm_regmap_desc pfm_amd64_pmc_desc[] = {
15230 +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
15231 +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
15232 +/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
15233 +/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
15234 +/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", 0, PFM_AMD64_IBSFETCHCTL_RSVD, 0, MSR_AMD64_IBSFETCHCTL),
15235 +/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", 0, PFM_AMD64_IBSOPCTL_RSVD, 0, MSR_AMD64_IBSOPCTL),
15236 +};
15237 +#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
15238 +
15239 +#define PFM_REG_IBS (PFM_REG_I|PFM_REG_INTR)
15240 +
15241 +/*
15242 + * AMD64 counters are 48 bits, upper bits are reserved
15243 + */
15244 +#define PFM_AMD64_CTR_RSVD (~((1ULL<<48)-1))
15245 +
15246 +#define PFM_AMD_D(n) \
15247 + { .type = PFM_REG_C, \
15248 + .desc = "PERFCTR"#n, \
15249 + .hw_addr = MSR_K7_PERFCTR0+n, \
15250 + .rsvd_msk = PFM_AMD64_CTR_RSVD, \
15251 + .dep_pmcs[0] = 1ULL << n \
15252 + }
15253 +
15254 +#define PFM_AMD_IBSO(t, s, a) \
15255 + { .type = t, \
15256 + .desc = s, \
15257 + .hw_addr = a, \
15258 + .rsvd_msk = 0, \
15259 + .dep_pmcs[0] = 1ULL << 5 \
15260 + }
15261 +
15262 +#define PFM_AMD_IBSF(t, s, a) \
15263 + { .type = t, \
15264 + .desc = s, \
15265 + .hw_addr = a, \
15266 + .rsvd_msk = 0, \
15267 + .dep_pmcs[0] = 1ULL << 6 \
15268 + }
15269 +
15270 +static struct pfm_regmap_desc pfm_amd64_pmd_desc[] = {
15271 +/* pmd0 */ PFM_AMD_D(0),
15272 +/* pmd1 */ PFM_AMD_D(1),
15273 +/* pmd2 */ PFM_AMD_D(2),
15274 +/* pmd3 */ PFM_AMD_D(3),
15275 +/* pmd4 */ PFM_AMD_IBSF(PFM_REG_IBS, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
15276 +/* pmd5 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
15277 +/* pmd6 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
15278 +/* pmd7 */ PFM_AMD_IBSO(PFM_REG_IBS, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
15279 +/* pmd8 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
15280 +/* pmd9 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
15281 +/* pmd10 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
15282 +/* pmd11 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
15283 +/* pmd12 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
15284 +/* pmd13 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
15285 +};
15286 +#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
15287 +
15288 +static struct pfm_context **pfm_nb_sys_owners;
15289 +static struct pfm_context *pfm_nb_task_owner;
15290 +
15291 +static struct pfm_pmu_config pfm_amd64_pmu_conf;
15292 +
15293 +#define is_ibs_pmc(x) (x == 4 || x == 5)
15294 +
15295 +static void pfm_amd64_setup_eilvt_per_cpu(void *info)
15296 +{
15297 + u8 lvt_off;
15298 +
15299 + /* program the IBS vector to the perfmon vector */
15300 + lvt_off = setup_APIC_eilvt_ibs(LOCAL_PERFMON_VECTOR,
15301 + APIC_EILVT_MSG_FIX, 0);
15302 + PFM_DBG("APIC_EILVT%d set to 0x%x", lvt_off, LOCAL_PERFMON_VECTOR);
15303 + ibs_eilvt_off = lvt_off;
15304 +}
15305 +
15306 +static int pfm_amd64_setup_eilvt(void)
15307 +{
15308 +#define IBSCTL_LVTOFFSETVAL (1 << 8)
15309 +#define IBSCTL 0x1cc
15310 + struct pci_dev *cpu_cfg;
15311 + int nodes;
15312 + u32 value = 0;
15313 +
15314 + /* per CPU setup */
15315 + on_each_cpu(pfm_amd64_setup_eilvt_per_cpu, NULL, 1);
15316 +
15317 + nodes = 0;
15318 + cpu_cfg = NULL;
15319 + do {
15320 + cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
15321 + PCI_DEVICE_ID_AMD_10H_NB_MISC,
15322 + cpu_cfg);
15323 + if (!cpu_cfg)
15324 + break;
15325 + ++nodes;
15326 + pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
15327 + | IBSCTL_LVTOFFSETVAL);
15328 + pci_read_config_dword(cpu_cfg, IBSCTL, &value);
15329 + if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
15330 + PFM_DBG("Failed to setup IBS LVT offset, "
15331 + "IBSCTL = 0x%08x", value);
15332 + return 1;
15333 + }
15334 + } while (1);
15335 +
15336 + if (!nodes) {
15337 + PFM_DBG("No CPU node configured for IBS");
15338 + return 1;
15339 + }
15340 +
15341 +#ifdef CONFIG_NUMA
15342 + /* Sanity check */
15343 + /* Works only for 64bit with proper numa implementation. */
15344 + if (nodes != num_possible_nodes()) {
15345 + PFM_DBG("Failed to setup CPU node(s) for IBS, "
15346 + "found: %d, expected %d",
15347 + nodes, num_possible_nodes());
15348 + return 1;
15349 + }
15350 +#endif
15351 + return 0;
15352 +}
15353 +
15354 +/*
15355 + * There can only be one user per socket for the Northbridge (NB) events,
15356 + * so we enforce mutual exclusion as follows:
15357 + * - per-thread : only one context machine-wide can use NB events
15358 + * - system-wide: only one context per processor socket
15359 + *
15360 + * Exclusion is enforced at:
15361 + * - pfm_load_context()
15362 + * - pfm_write_pmcs() for attached contexts
15363 + *
15364 + * Exclusion is released at:
15365 + * - pfm_unload_context() or any calls that implicitely uses it
15366 + *
15367 + * return:
15368 + * 0 : successfully acquire NB access
15369 + * < 0: errno, failed to acquire NB access
15370 + */
15371 +static int pfm_amd64_acquire_nb(struct pfm_context *ctx)
15372 +{
15373 + struct pfm_context **entry, *old;
15374 + int proc_id;
15375 +
15376 +#ifdef CONFIG_SMP
15377 + proc_id = cpu_data(smp_processor_id()).phys_proc_id;
15378 +#else
15379 + proc_id = 0;
15380 +#endif
15381 +
15382 + if (ctx->flags.system)
15383 + entry = &pfm_nb_sys_owners[proc_id];
15384 + else
15385 + entry = &pfm_nb_task_owner;
15386 +
15387 + old = cmpxchg(entry, NULL, ctx);
15388 + if (!old) {
15389 + if (ctx->flags.system)
15390 + PFM_DBG("acquired Northbridge event access on socket %u", proc_id);
15391 + else
15392 + PFM_DBG("acquired Northbridge event access globally");
15393 + } else if (old != ctx) {
15394 + if (ctx->flags.system)
15395 + PFM_DBG("NorthBridge event conflict on socket %u", proc_id);
15396 + else
15397 + PFM_DBG("global NorthBridge event conflict");
15398 + return -EBUSY;
15399 + }
15400 + return 0;
15401 +}
15402 +
15403 +/*
15404 + * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e.,
15405 + * when we have detected a multi-core processor.
15406 + *
15407 + * context is locked, interrupts are masked
15408 + */
15409 +static int pfm_amd64_pmc_write_check(struct pfm_context *ctx,
15410 + struct pfm_event_set *set,
15411 + struct pfarg_pmc *req)
15412 +{
15413 + unsigned int event;
15414 +
15415 + /*
15416 + * delay checking NB event until we load the context
15417 + */
15418 + if (ctx->state == PFM_CTX_UNLOADED)
15419 + return 0;
15420 +
15421 + /*
15422 + * check event is NB event
15423 + */
15424 + event = (unsigned int)(req->reg_value & 0xff);
15425 + if (event < 0xee)
15426 + return 0;
15427 +
15428 + return pfm_amd64_acquire_nb(ctx);
15429 +}
15430 +
15431 +/*
15432 + * invoked on pfm_load_context().
15433 + * context is locked, interrupts are masked
15434 + */
15435 +static int pfm_amd64_load_context(struct pfm_context *ctx)
15436 +{
15437 + struct pfm_event_set *set;
15438 + unsigned int i, n;
15439 +
15440 + /*
15441 + * scan all sets for NB events
15442 + */
15443 + list_for_each_entry(set, &ctx->set_list, list) {
15444 + n = set->nused_pmcs;
15445 + for (i = 0; n; i++) {
15446 + if (!test_bit(i, cast_ulp(set->used_pmcs)))
15447 + continue;
15448 +
15449 + if (!is_ibs_pmc(i) && (set->pmcs[i] & 0xff) >= 0xee)
15450 + goto found;
15451 + n--;
15452 + }
15453 + }
15454 + return 0;
15455 +found:
15456 + return pfm_amd64_acquire_nb(ctx);
15457 +}
15458 +
15459 +/*
15460 + * invoked on pfm_unload_context()
15461 + */
15462 +static void pfm_amd64_unload_context(struct pfm_context *ctx)
15463 +{
15464 + struct pfm_context **entry, *old;
15465 + int proc_id;
15466 +
15467 +#ifdef CONFIG_SMP
15468 + proc_id = cpu_data(smp_processor_id()).phys_proc_id;
15469 +#else
15470 + proc_id = 0;
15471 +#endif
15472 +
15473 + /*
15474 + * unload always happens on the monitored CPU in system-wide
15475 + */
15476 + if (ctx->flags.system)
15477 + entry = &pfm_nb_sys_owners[proc_id];
15478 + else
15479 + entry = &pfm_nb_task_owner;
15480 +
15481 + old = cmpxchg(entry, ctx, NULL);
15482 + if (old == ctx) {
15483 + if (ctx->flags.system)
15484 + PFM_DBG("released NorthBridge on socket %u", proc_id);
15485 + else
15486 + PFM_DBG("released NorthBridge events globally");
15487 + }
15488 +}
15489 +
15490 +/*
15491 + * detect if we need to activate NorthBridge event access control
15492 + */
15493 +static int pfm_amd64_setup_nb_event_control(void)
15494 +{
15495 + unsigned int c, n = 0;
15496 + unsigned int max_phys = 0;
15497 +
15498 +#ifdef CONFIG_SMP
15499 + for_each_possible_cpu(c) {
15500 + if (cpu_data(c).phys_proc_id > max_phys)
15501 + max_phys = cpu_data(c).phys_proc_id;
15502 + }
15503 +#else
15504 + max_phys = 0;
15505 +#endif
15506 + if (max_phys > 255) {
15507 + PFM_INFO("socket id %d is too big to handle", max_phys);
15508 + return -ENOMEM;
15509 + }
15510 +
15511 + n = max_phys + 1;
15512 + if (n < 2)
15513 + return 0;
15514 +
15515 + pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners));
15516 + if (!pfm_nb_sys_owners)
15517 + return -ENOMEM;
15518 +
15519 + memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners));
15520 + pfm_nb_task_owner = NULL;
15521 +
15522 + /*
15523 + * activate write-checker for PMC registers
15524 + */
15525 + for (c = 0; c < PFM_AMD_NUM_PMCS; c++) {
15526 + if (!is_ibs_pmc(c))
15527 + pfm_amd64_pmc_desc[c].type |= PFM_REG_WC;
15528 + }
15529 +
15530 + pfm_amd64_pmu_info.load_context = pfm_amd64_load_context;
15531 + pfm_amd64_pmu_info.unload_context = pfm_amd64_unload_context;
15532 +
15533 + pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check;
15534 +
15535 + PFM_INFO("NorthBridge event access control enabled");
15536 +
15537 + return 0;
15538 +}
15539 +
15540 +/*
15541 + * disable registers which are not available on
15542 + * the host (applies to IBS registers)
15543 + */
15544 +static void pfm_amd64_check_registers(void)
15545 +{
15546 + u16 i;
15547 +
15548 + PFM_DBG("has_ibs=%d", !!(ibs_status & HAS_IBS));
15549 +
15550 + __set_bit(0, cast_ulp(enable_mask));
15551 + __set_bit(1, cast_ulp(enable_mask));
15552 + __set_bit(2, cast_ulp(enable_mask));
15553 + __set_bit(3, cast_ulp(enable_mask));
15554 + max_enable = 3+1;
15555 +
15556 +
15557 + /*
15558 + * remove IBS registers if feature not present
15559 + */
15560 + if (!(ibs_status & HAS_IBS)) {
15561 + pfm_amd64_pmc_desc[4].type = PFM_REG_NA;
15562 + pfm_amd64_pmc_desc[5].type = PFM_REG_NA;
15563 + for (i = 4; i < 14; i++)
15564 + pfm_amd64_pmd_desc[i].type = PFM_REG_NA;
15565 + } else {
15566 + __set_bit(16, cast_ulp(enable_mask));
15567 + __set_bit(17, cast_ulp(enable_mask));
15568 + max_enable = 17 + 1;
15569 + }
15570 +
15571 + /*
15572 + * adjust reserved bit fields for family 16
15573 + */
15574 + if (current_cpu_data.x86 == 16) {
15575 + for (i = 0; i < PFM_AMD_NUM_PMCS; i++)
15576 + if (pfm_amd64_pmc_desc[i].rsvd_msk == PFM_K8_RSVD)
15577 + pfm_amd64_pmc_desc[i].rsvd_msk = PFM_16_RSVD;
15578 + }
15579 +}
15580 +
15581 +static int pfm_amd64_probe_pmu(void)
15582 +{
15583 + u64 val = 0;
15584 + if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
15585 + PFM_INFO("not an AMD processor");
15586 + return -1;
15587 + }
15588 +
15589 + switch (current_cpu_data.x86) {
15590 + case 16:
15591 + case 15:
15592 + case 6:
15593 + break;
15594 + default:
15595 + PFM_INFO("unsupported family=%d", current_cpu_data.x86);
15596 + return -1;
15597 + }
15598 +
15599 + /* check for IBS */
15600 + if (cpu_has(&current_cpu_data, X86_FEATURE_IBS)) {
15601 + ibs_status |= HAS_IBS;
15602 + rdmsrl(MSR_AMD64_IBSCTL, val);
15603 + }
15604 +
15605 + PFM_INFO("found family=%d IBSCTL=0x%llx", current_cpu_data.x86, (unsigned long long)val);
15606 +
15607 + /*
15608 + * check for local APIC (required)
15609 + */
15610 + if (!cpu_has_apic) {
15611 + PFM_INFO("no local APIC, unsupported");
15612 + return -1;
15613 + }
15614 +
15615 + if (current_cpu_data.x86_max_cores > 1
15616 + && pfm_amd64_setup_nb_event_control())
15617 + return -1;
15618 +
15619 + if (force_nmi)
15620 + pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
15621 +
15622 + if (ibs_status & HAS_IBS) {
15623 + /* Setup extended interrupt */
15624 + if (pfm_amd64_setup_eilvt()) {
15625 + PFM_INFO("Failed to initialize extended interrupts "
15626 + "for IBS");
15627 + ibs_status &= ~HAS_IBS;
15628 + PFM_INFO("Unable to use IBS");
15629 + } else {
15630 + PFM_INFO("IBS supported");
15631 + }
15632 + }
15633 +
15634 + pfm_amd64_check_registers();
15635 +
15636 + return 0;
15637 +}
15638 +
15639 +/*
15640 + * detect is counters have overflowed.
15641 + * return:
15642 + * 0 : no overflow
15643 + * 1 : at least one overflow
15644 + */
15645 +static int __kprobes pfm_amd64_has_ovfls(struct pfm_context *ctx)
15646 +{
15647 + struct pfm_regmap_desc *xrd;
15648 + u64 *cnt_mask;
15649 + u64 wmask, val;
15650 + u16 i, num;
15651 +
15652 + /*
15653 + * Check for IBS events
15654 + */
15655 + if (ibs_status & HAS_IBS) {
15656 + rdmsrl(MSR_AMD64_IBSFETCHCTL, val);
15657 + if (val & PFM_AMD64_IBSFETCHVAL)
15658 + return 1;
15659 + rdmsrl(MSR_AMD64_IBSOPCTL, val);
15660 + if (val & PFM_AMD64_IBSOPVAL)
15661 + return 1;
15662 + }
15663 + /*
15664 + * Check regular counters
15665 + */
15666 + cnt_mask = ctx->regs.cnt_pmds;
15667 + num = ctx->regs.num_counters;
15668 + wmask = 1ULL << pfm_pmu_conf->counter_width;
15669 + xrd = pfm_amd64_pmd_desc;
15670 +
15671 + for (i = 0; num; i++) {
15672 + if (test_bit(i, cast_ulp(cnt_mask))) {
15673 + rdmsrl(xrd[i].hw_addr, val);
15674 + if (!(val & wmask))
15675 + return 1;
15676 + num--;
15677 + }
15678 + }
15679 + return 0;
15680 +}
15681 +
15682 +/*
15683 + * Must check for IBS event BEFORE stop_save_p6 because
15684 + * stopping monitoring does destroy IBS state information
15685 + * in IBSFETCHCTL/IBSOPCTL because they are tagged as enable
15686 + * registers.
15687 + */
15688 +static int pfm_amd64_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
15689 +{
15690 + struct pfm_arch_pmu_info *pmu_info;
15691 + u64 used_mask[PFM_PMC_BV];
15692 + u64 *cnt_pmds;
15693 + u64 val, wmask, ovfl_mask;
15694 + u32 i, count, use_ibs;
15695 +
15696 + pmu_info = pfm_pmu_info();
15697 +
15698 + /*
15699 + * IBS used if:
15700 + * - on family 10h processor with IBS
15701 + * - at least one of the IBS PMD registers is used
15702 + */
15703 + use_ibs = (ibs_status & HAS_IBS)
15704 + && (test_bit(IBSFETCHCTL_PMD, cast_ulp(set->used_pmds))
15705 + || test_bit(IBSOPSCTL_PMD, cast_ulp(set->used_pmds)));
15706 +
15707 + wmask = 1ULL << pfm_pmu_conf->counter_width;
15708 +
15709 + bitmap_and(cast_ulp(used_mask),
15710 + cast_ulp(set->used_pmcs),
15711 + cast_ulp(enable_mask),
15712 + max_enable);
15713 +
15714 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
15715 +
15716 + /*
15717 + * stop monitoring
15718 + * Unfortunately, this is very expensive!
15719 + * wrmsrl() is serializing.
15720 + *
15721 + * With IBS, we need to do read-modify-write to preserve the content
15722 + * for OpsCTL and FetchCTL because they are also used as PMDs and saved
15723 + * below
15724 + */
15725 + if (use_ibs) {
15726 + for (i = 0; count; i++) {
15727 + if (test_bit(i, cast_ulp(used_mask))) {
15728 + if (i == IBSFETCHCTL_PMC) {
15729 + rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
15730 + val &= ~PFM_AMD64_IBSFETCHEN;
15731 + } else if (i == IBSOPSCTL_PMC) {
15732 + rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
15733 + val &= ~PFM_AMD64_IBSOPEN;
15734 + } else
15735 + val = 0;
15736 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
15737 + count--;
15738 + }
15739 + }
15740 + } else {
15741 + for (i = 0; count; i++) {
15742 + if (test_bit(i, cast_ulp(used_mask))) {
15743 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
15744 + count--;
15745 + }
15746 + }
15747 + }
15748 +
15749 + /*
15750 + * if we already having a pending overflow condition, we simply
15751 + * return to take care of this first.
15752 + */
15753 + if (set->npend_ovfls)
15754 + return 1;
15755 +
15756 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
15757 + cnt_pmds = ctx->regs.cnt_pmds;
15758 +
15759 + /*
15760 + * check for pending overflows and save PMDs (combo)
15761 + * we employ used_pmds because we also need to save
15762 + * and not just check for pending interrupts.
15763 + *
15764 + * Must check for counting PMDs because of virtual PMDs and IBS
15765 + */
15766 + count = set->nused_pmds;
15767 + for (i = 0; count; i++) {
15768 + if (test_bit(i, cast_ulp(set->used_pmds))) {
15769 + val = pfm_arch_read_pmd(ctx, i);
15770 + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
15771 + if (!(val & wmask)) {
15772 + __set_bit(i, cast_ulp(set->povfl_pmds));
15773 + set->npend_ovfls++;
15774 + }
15775 + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask);
15776 + }
15777 + set->pmds[i].value = val;
15778 + count--;
15779 + }
15780 + }
15781 +
15782 + /*
15783 + * check if IBS contains valid data, and mark the corresponding
15784 + * PMD has overflowed
15785 + */
15786 + if (use_ibs) {
15787 + if (set->pmds[IBSFETCHCTL_PMD].value & PFM_AMD64_IBSFETCHVAL) {
15788 + __set_bit(IBSFETCHCTL_PMD, cast_ulp(set->povfl_pmds));
15789 + set->npend_ovfls++;
15790 + }
15791 + if (set->pmds[IBSOPSCTL_PMD].value & PFM_AMD64_IBSOPVAL) {
15792 + __set_bit(IBSOPSCTL_PMD, cast_ulp(set->povfl_pmds));
15793 + set->npend_ovfls++;
15794 + }
15795 + }
15796 + /* 0 means: no need to save PMDs at upper level */
15797 + return 0;
15798 +}
15799 +
15800 +/**
15801 + * pfm_amd64_quiesce_pmu -- stop monitoring without grabbing any lock
15802 + *
15803 + * called from NMI interrupt handler to immediately stop monitoring
15804 + * cannot grab any lock, including perfmon related locks
15805 + */
15806 +static void __kprobes pfm_amd64_quiesce(void)
15807 +{
15808 + /*
15809 + * quiesce PMU by clearing available registers that have
15810 + * the start/stop capability
15811 + */
15812 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15813 + wrmsrl(MSR_K7_EVNTSEL0, 0);
15814 + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15815 + wrmsrl(MSR_K7_EVNTSEL0+1, 0);
15816 + if (test_bit(2, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15817 + wrmsrl(MSR_K7_EVNTSEL0+2, 0);
15818 + if (test_bit(3, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15819 + wrmsrl(MSR_K7_EVNTSEL0+3, 0);
15820 +
15821 + if (test_bit(4, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15822 + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
15823 + if (test_bit(5, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
15824 + wrmsrl(MSR_AMD64_IBSOPCTL, 0);
15825 +}
15826 +
15827 +/**
15828 + * pfm_amd64_restore_pmcs - reload PMC registers
15829 + * @ctx: context to restore from
15830 + * @set: current event set
15831 + *
15832 + * optimized version of pfm_arch_restore_pmcs(). On AMD64, we can
15833 + * afford to only restore the pmcs registers we use, because they are
15834 + * all independent from each other.
15835 + */
15836 +static void pfm_amd64_restore_pmcs(struct pfm_context *ctx,
15837 + struct pfm_event_set *set)
15838 +{
15839 + u64 *mask;
15840 + u16 i, num;
15841 +
15842 + mask = set->used_pmcs;
15843 + num = set->nused_pmcs;
15844 + for (i = 0; num; i++) {
15845 + if (test_bit(i, cast_ulp(mask))) {
15846 + wrmsrl(pfm_amd64_pmc_desc[i].hw_addr, set->pmcs[i]);
15847 + num--;
15848 + }
15849 + }
15850 +}
15851 +
15852 +static struct pfm_pmu_config pfm_amd64_pmu_conf = {
15853 + .pmu_name = "AMD64",
15854 + .counter_width = 47,
15855 + .pmd_desc = pfm_amd64_pmd_desc,
15856 + .pmc_desc = pfm_amd64_pmc_desc,
15857 + .num_pmc_entries = PFM_AMD_NUM_PMCS,
15858 + .num_pmd_entries = PFM_AMD_NUM_PMDS,
15859 + .probe_pmu = pfm_amd64_probe_pmu,
15860 + .version = "1.2",
15861 + .pmu_info = &pfm_amd64_pmu_info,
15862 + .flags = PFM_PMU_BUILTIN_FLAG,
15863 + .owner = THIS_MODULE,
15864 +};
15865 +
15866 +static int __init pfm_amd64_pmu_init_module(void)
15867 +{
15868 + return pfm_pmu_register(&pfm_amd64_pmu_conf);
15869 +}
15870 +
15871 +static void __exit pfm_amd64_pmu_cleanup_module(void)
15872 +{
15873 + if (pfm_nb_sys_owners)
15874 + vfree(pfm_nb_sys_owners);
15875 +
15876 + pfm_pmu_unregister(&pfm_amd64_pmu_conf);
15877 +}
15878 +
15879 +module_init(pfm_amd64_pmu_init_module);
15880 +module_exit(pfm_amd64_pmu_cleanup_module);
15881 diff --git a/arch/x86/perfmon/perfmon_intel_arch.c b/arch/x86/perfmon/perfmon_intel_arch.c
15882 new file mode 100644
15883 index 0000000..e27a732
15884 --- /dev/null
15885 +++ b/arch/x86/perfmon/perfmon_intel_arch.c
15886 @@ -0,0 +1,610 @@
15887 +/*
15888 + * This file contains the Intel architectural perfmon v1, v2, v3
15889 + * description tables.
15890 + *
15891 + * Architectural perfmon was introduced with Intel Core Solo/Duo
15892 + * processors.
15893 + *
15894 + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P.
15895 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
15896 + *
15897 + * This program is free software; you can redistribute it and/or
15898 + * modify it under the terms of version 2 of the GNU General Public
15899 + * License as published by the Free Software Foundation.
15900 + *
15901 + * This program is distributed in the hope that it will be useful,
15902 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15903 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15904 + * General Public License for more details.
15905 + *
15906 + * You should have received a copy of the GNU General Public License
15907 + * along with this program; if not, write to the Free Software
15908 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
15909 + * 02111-1307 USA
15910 + */
15911 +#include <linux/module.h>
15912 +#include <linux/kprobes.h>
15913 +#include <linux/perfmon_kern.h>
15914 +#include <linux/nmi.h>
15915 +#include <asm/msr.h>
15916 +#include <asm/apic.h>
15917 +
15918 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
15919 +MODULE_DESCRIPTION("Intel architectural perfmon v1");
15920 +MODULE_LICENSE("GPL");
15921 +
15922 +static int force, force_nmi;
15923 +MODULE_PARM_DESC(force, "bool: force module to load succesfully");
15924 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
15925 +module_param(force, bool, 0600);
15926 +module_param(force_nmi, bool, 0600);
15927 +
15928 +static u64 enable_mask[PFM_MAX_PMCS];
15929 +static u16 max_enable;
15930 +
15931 +/*
15932 + * - upper 32 bits are reserved
15933 + * - INT: APIC enable bit is reserved (forced to 1)
15934 + * - bit 21 is reserved
15935 + *
15936 + * RSVD: reserved bits are 1
15937 + */
15938 +#define PFM_IA_PMC_RSVD ((~((1ULL<<32)-1)) \
15939 + | (1ULL<<20) \
15940 + | (1ULL<<21))
15941 +
15942 +/*
15943 + * force Local APIC interrupt on overflow
15944 + * disable with NO_EMUL64
15945 + */
15946 +#define PFM_IA_PMC_VAL (1ULL<<20)
15947 +#define PFM_IA_NO64 (1ULL<<20)
15948 +
15949 +/*
15950 + * architectuture specifies that:
15951 + * IA32_PMCx MSR : starts at 0x0c1 & occupy a contiguous block of MSR
15952 + * IA32_PERFEVTSELx MSR : starts at 0x186 & occupy a contiguous block of MSR
15953 + * MSR_GEN_FIXED_CTR0 : starts at 0x309 & occupy a contiguous block of MSR
15954 + */
15955 +#define MSR_GEN_SEL_BASE MSR_P6_EVNTSEL0
15956 +#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0
15957 +#define MSR_GEN_FIXED_PMC_BASE MSR_CORE_PERF_FIXED_CTR0
15958 +
15959 +/*
15960 + * layout of EAX for CPUID.0xa leaf function
15961 + */
15962 +struct pmu_eax {
15963 + unsigned int version:8; /* architectural perfmon version */
15964 + unsigned int num_cnt:8; /* number of generic counters */
15965 + unsigned int cnt_width:8; /* width of generic counters */
15966 + unsigned int ebx_length:8; /* number of architected events */
15967 +};
15968 +
15969 +/*
15970 + * layout of EDX for CPUID.0xa leaf function when perfmon v2 is detected
15971 + */
15972 +struct pmu_edx {
15973 + unsigned int num_cnt:5; /* number of fixed counters */
15974 + unsigned int cnt_width:8; /* width of fixed counters */
15975 + unsigned int reserved:19;
15976 +};
15977 +
15978 +static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx,
15979 + struct pfm_event_set *set);
15980 +static int pfm_intel_arch_stop_save(struct pfm_context *ctx,
15981 + struct pfm_event_set *set);
15982 +static int pfm_intel_arch_has_ovfls(struct pfm_context *ctx);
15983 +static void __kprobes pfm_intel_arch_quiesce(void);
15984 +
15985 +/*
15986 + * physical addresses of MSR controlling the perfevtsel and counter registers
15987 + */
15988 +struct pfm_arch_pmu_info pfm_intel_arch_pmu_info = {
15989 + .stop_save = pfm_intel_arch_stop_save,
15990 + .has_ovfls = pfm_intel_arch_has_ovfls,
15991 + .quiesce = pfm_intel_arch_quiesce,
15992 + .restore_pmcs = pfm_intel_arch_restore_pmcs
15993 +};
15994 +
15995 +#define PFM_IA_C(n) { \
15996 + .type = PFM_REG_I64, \
15997 + .desc = "PERFEVTSEL"#n, \
15998 + .dfl_val = PFM_IA_PMC_VAL, \
15999 + .rsvd_msk = PFM_IA_PMC_RSVD, \
16000 + .no_emul64_msk = PFM_IA_NO64, \
16001 + .hw_addr = MSR_GEN_SEL_BASE+(n) \
16002 + }
16003 +
16004 +#define PFM_IA_D(n) \
16005 + { .type = PFM_REG_C, \
16006 + .desc = "PMC"#n, \
16007 + .hw_addr = MSR_P6_PERFCTR0+n, \
16008 + .dep_pmcs[0] = 1ULL << n \
16009 + }
16010 +
16011 +#define PFM_IA_FD(n) \
16012 + { .type = PFM_REG_C, \
16013 + .desc = "FIXED_CTR"#n, \
16014 + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
16015 + .dep_pmcs[0] = 1ULL << 16 \
16016 + }
16017 +
16018 +static struct pfm_regmap_desc pfm_intel_arch_pmc_desc[] = {
16019 +/* pmc0 */ PFM_IA_C(0), PFM_IA_C(1), PFM_IA_C(2), PFM_IA_C(3),
16020 +/* pmc4 */ PFM_IA_C(4), PFM_IA_C(5), PFM_IA_C(6), PFM_IA_C(7),
16021 +/* pmc8 */ PFM_IA_C(8), PFM_IA_C(9), PFM_IA_C(10), PFM_IA_C(11),
16022 +/* pmc12 */ PFM_IA_C(12), PFM_IA_C(13), PFM_IA_C(14), PFM_IA_C(15),
16023 +
16024 +/* pmc16 */ { .type = PFM_REG_I,
16025 + .desc = "FIXED_CTRL",
16026 + .dfl_val = 0x8888888888888888ULL, /* force PMI */
16027 + .rsvd_msk = 0, /* set dynamically */
16028 + .no_emul64_msk = 0,
16029 + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
16030 + },
16031 +};
16032 +#define PFM_IA_MAX_PMCS ARRAY_SIZE(pfm_intel_arch_pmc_desc)
16033 +
16034 +static struct pfm_regmap_desc pfm_intel_arch_pmd_desc[] = {
16035 +/* pmd0 */ PFM_IA_D(0), PFM_IA_D(1), PFM_IA_D(2), PFM_IA_D(3),
16036 +/* pmd4 */ PFM_IA_D(4), PFM_IA_D(5), PFM_IA_D(6), PFM_IA_D(7),
16037 +/* pmd8 */ PFM_IA_D(8), PFM_IA_D(9), PFM_IA_D(10), PFM_IA_D(11),
16038 +/* pmd12 */ PFM_IA_D(12), PFM_IA_D(13), PFM_IA_D(14), PFM_IA_D(15),
16039 +
16040 +/* pmd16 */ PFM_IA_FD(0), PFM_IA_FD(1), PFM_IA_FD(2), PFM_IA_FD(3),
16041 +/* pmd20 */ PFM_IA_FD(4), PFM_IA_FD(5), PFM_IA_FD(6), PFM_IA_FD(7),
16042 +/* pmd24 */ PFM_IA_FD(8), PFM_IA_FD(9), PFM_IA_FD(10), PFM_IA_FD(11),
16043 +/* pmd28 */ PFM_IA_FD(16), PFM_IA_FD(17), PFM_IA_FD(18), PFM_IA_FD(19)
16044 +};
16045 +#define PFM_IA_MAX_PMDS ARRAY_SIZE(pfm_intel_arch_pmd_desc)
16046 +
16047 +#define PFM_IA_MAX_CNT 16 /* # generic counters in mapping table */
16048 +#define PFM_IA_MAX_FCNT 16 /* # of fixed counters in mapping table */
16049 +#define PFM_IA_FCNT_BASE 16 /* base index of fixed counters PMD */
16050 +
16051 +static struct pfm_pmu_config pfm_intel_arch_pmu_conf;
16052 +
16053 +static void pfm_intel_arch_check_errata(void)
16054 +{
16055 + /*
16056 + * Core Duo errata AE49 (no fix). Both counters share a single
16057 + * enable bit in PERFEVTSEL0
16058 + */
16059 + if (current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 14)
16060 + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_NO_SHARING;
16061 +}
16062 +
16063 +static inline void set_enable_mask(unsigned int i)
16064 +{
16065 + __set_bit(i, cast_ulp(enable_mask));
16066 +
16067 + /* max_enable = highest + 1 */
16068 + if ((i+1) > max_enable)
16069 + max_enable = i+ 1;
16070 +}
16071 +
16072 +static void pfm_intel_arch_setup_generic(unsigned int version,
16073 + unsigned int width,
16074 + unsigned int count)
16075 +{
16076 + u64 rsvd;
16077 + unsigned int i;
16078 +
16079 + /*
16080 + * first we handle the generic counters:
16081 + *
16082 + * - ensure HW does not have more registers than hardcoded in the tables
16083 + * - adjust rsvd_msk to actual counter width
16084 + * - initialize enable_mask (list of PMC with start/stop capability)
16085 + * - mark unused hardcoded generic counters as unimplemented
16086 + */
16087 +
16088 + /*
16089 + * min of number of Hw counters and hardcoded in the tables
16090 + */
16091 + if (count >= PFM_IA_MAX_CNT) {
16092 + printk(KERN_INFO "perfmon: Limiting number of generic counters"
16093 + " to %u, HW supports %u",
16094 + PFM_IA_MAX_CNT, count);
16095 + count = PFM_IA_MAX_CNT;
16096 + }
16097 +
16098 + /*
16099 + * adjust rsvd_msk for generic counters based on actual width
16100 + * initialize enable_mask (1 per pmd)
16101 + */
16102 + rsvd = ~((1ULL << width)-1);
16103 + for (i = 0; i < count; i++) {
16104 + pfm_intel_arch_pmd_desc[i].rsvd_msk = rsvd;
16105 + set_enable_mask(i);
16106 + }
16107 +
16108 + /*
16109 + * handle version 3 new anythread bit (21)
16110 + */
16111 + if (version == 3) {
16112 + for (i = 0; i < count; i++)
16113 + pfm_intel_arch_pmc_desc[i].rsvd_msk &= ~(1ULL << 21);
16114 + }
16115 +
16116 +
16117 + /*
16118 + * mark unused generic counters as not available
16119 + */
16120 + for (i = count ; i < PFM_IA_MAX_CNT; i++) {
16121 + pfm_intel_arch_pmd_desc[i].type = PFM_REG_NA;
16122 + pfm_intel_arch_pmc_desc[i].type = PFM_REG_NA;
16123 + }
16124 +}
16125 +
16126 +static void pfm_intel_arch_setup_fixed(unsigned int version,
16127 + unsigned int width,
16128 + unsigned int count)
16129 +{
16130 + u64 rsvd, dfl;
16131 + unsigned int i;
16132 +
16133 + /*
16134 + * handle the fixed counters (if any):
16135 + *
16136 + * - ensure HW does not have more registers than hardcoded in the tables
16137 + * - adjust rsvd_msk to actual counter width
16138 + * - initialize enable_mask (list of PMC with start/stop capability)
16139 + * - mark unused hardcoded generic counters as unimplemented
16140 + */
16141 + if (count >= PFM_IA_MAX_FCNT) {
16142 + printk(KERN_INFO "perfmon: Limiting number of fixed counters"
16143 + " to %u, HW supports %u",
16144 + PFM_IA_MAX_FCNT, count);
16145 + count = PFM_IA_MAX_FCNT;
16146 + }
16147 + /*
16148 + * adjust rsvd_msk for fixed counters based on actual width
16149 + */
16150 + rsvd = ~((1ULL << width)-1);
16151 + for (i = 0; i < count; i++)
16152 + pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].rsvd_msk = rsvd;
16153 +
16154 + /*
16155 + * handle version new anythread bit (bit 2)
16156 + */
16157 + if (version == 3)
16158 + rsvd = 1ULL << 3;
16159 + else
16160 + rsvd = 3ULL << 2;
16161 +
16162 + pfm_intel_arch_pmc_desc[16].rsvd_msk = 0;
16163 + for (i = 0; i < count; i++)
16164 + pfm_intel_arch_pmc_desc[16].rsvd_msk |= rsvd << (i<<2);
16165 +
16166 + /*
16167 + * mark unused fixed counters as unimplemented
16168 + *
16169 + * update the rsvd_msk, dfl_val in FIXED_CTRL:
16170 + * - rsvd_msk: set all 4 bits
16171 + * - dfl_val : clear all 4 bits
16172 + */
16173 + dfl = pfm_intel_arch_pmc_desc[16].dfl_val;
16174 + rsvd = pfm_intel_arch_pmc_desc[16].rsvd_msk;
16175 +
16176 + for (i = count ; i < PFM_IA_MAX_FCNT; i++) {
16177 + pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].type = PFM_REG_NA;
16178 + rsvd |= 0xfULL << (i<<2);
16179 + dfl &= ~(0xfULL << (i<<2));
16180 + }
16181 +
16182 + /*
16183 + * FIXED_CTR_CTRL unavailable when no fixed counters are defined
16184 + */
16185 + if (!count) {
16186 + pfm_intel_arch_pmc_desc[16].type = PFM_REG_NA;
16187 + } else {
16188 + /* update rsvd_mask and dfl_val */
16189 + pfm_intel_arch_pmc_desc[16].rsvd_msk = rsvd;
16190 + pfm_intel_arch_pmc_desc[16].dfl_val = dfl;
16191 + set_enable_mask(16);
16192 + }
16193 +}
16194 +
16195 +static int pfm_intel_arch_probe_pmu(void)
16196 +{
16197 + union {
16198 + unsigned int val;
16199 + struct pmu_eax eax;
16200 + struct pmu_edx edx;
16201 + } eax, edx;
16202 + unsigned int ebx, ecx;
16203 + unsigned int width = 0;
16204 +
16205 + edx.val = 0;
16206 +
16207 + if (!(cpu_has_arch_perfmon || force)) {
16208 + PFM_INFO("no support for Intel architectural PMU");
16209 + return -1;
16210 + }
16211 +
16212 + if (!cpu_has_apic) {
16213 + PFM_INFO("no Local APIC, try rebooting with lapic option");
16214 + return -1;
16215 + }
16216 +
16217 + /* cpuid() call protected by cpu_has_arch_perfmon */
16218 + cpuid(0xa, &eax.val, &ebx, &ecx, &edx.val);
16219 +
16220 + /*
16221 + * reject processors supported by perfmon_intel_core
16222 + *
16223 + * We need to do this explicitely to avoid depending
16224 + * on the link order in case, the modules are compiled as
16225 + * builtin.
16226 + *
16227 + * non Intel processors are rejected by cpu_has_arch_perfmon
16228 + */
16229 + if (current_cpu_data.x86 == 6 && !force) {
16230 + switch (current_cpu_data.x86_model) {
16231 + case 15: /* Merom: use perfmon_intel_core */
16232 + case 23: /* Penryn: use perfmon_intel_core */
16233 + return -1;
16234 + default:
16235 + break;
16236 + }
16237 + }
16238 +
16239 + /*
16240 + * some 6/15 models have buggy BIOS
16241 + */
16242 + if (eax.eax.version == 0
16243 + && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) {
16244 + PFM_INFO("buggy v2 BIOS, adjusting for 2 generic counters");
16245 + eax.eax.version = 2;
16246 + eax.eax.num_cnt = 2;
16247 + eax.eax.cnt_width = 40;
16248 + }
16249 +
16250 + /*
16251 + * Intel Atom processors have a buggy firmware which does not report
16252 + * the correct number of fixed counters
16253 + */
16254 + if (eax.eax.version == 3 && edx.edx.num_cnt < 3
16255 + && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 28) {
16256 + PFM_INFO("buggy v3 BIOS, adjusting for 3 fixed counters");
16257 + edx.edx.num_cnt = 3;
16258 + }
16259 +
16260 + /*
16261 + * some v2 BIOSes are incomplete
16262 + */
16263 + if (eax.eax.version == 2 && !edx.edx.num_cnt) {
16264 + PFM_INFO("buggy v2 BIOS, adjusting for 3 fixed counters");
16265 + edx.edx.num_cnt = 3;
16266 + edx.edx.cnt_width = 40;
16267 + }
16268 +
16269 + /*
16270 + * no fixed counters on earlier versions
16271 + */
16272 + if (eax.eax.version < 2) {
16273 + edx.val = 0;
16274 + } else {
16275 + /*
16276 + * use the min value of both widths until we support
16277 + * variable width counters
16278 + */
16279 + width = eax.eax.cnt_width < edx.edx.cnt_width ?
16280 + eax.eax.cnt_width : edx.edx.cnt_width;
16281 + }
16282 +
16283 + PFM_INFO("detected architecural perfmon v%d", eax.eax.version);
16284 + PFM_INFO("num_gen=%d width=%d num_fixed=%d width=%d",
16285 + eax.eax.num_cnt,
16286 + eax.eax.cnt_width,
16287 + edx.edx.num_cnt,
16288 + edx.edx.cnt_width);
16289 +
16290 +
16291 + pfm_intel_arch_setup_generic(eax.eax.version,
16292 + width,
16293 + eax.eax.num_cnt);
16294 +
16295 + pfm_intel_arch_setup_fixed(eax.eax.version,
16296 + width,
16297 + edx.edx.num_cnt);
16298 +
16299 + if (force_nmi)
16300 + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_USE_NMI;
16301 +
16302 + pfm_intel_arch_check_errata();
16303 +
16304 + return 0;
16305 +}
16306 +
16307 +/**
16308 + * pfm_intel_arch_has_ovfls - check for pending overflow condition
16309 + * @ctx: context to work on
16310 + *
16311 + * detect if counters have overflowed.
16312 + * return:
16313 + * 0 : no overflow
16314 + * 1 : at least one overflow
16315 + */
16316 +static int __kprobes pfm_intel_arch_has_ovfls(struct pfm_context *ctx)
16317 +{
16318 + u64 *cnt_mask;
16319 + u64 wmask, val;
16320 + u16 i, num;
16321 +
16322 + cnt_mask = ctx->regs.cnt_pmds;
16323 + num = ctx->regs.num_counters;
16324 + wmask = 1ULL << pfm_pmu_conf->counter_width;
16325 +
16326 + /*
16327 + * we can leverage the fact that we know the mapping
16328 + * to hardcode the MSR address and avoid accessing
16329 + * more cachelines
16330 + *
16331 + * We need to check cnt_mask because not all registers
16332 + * may be available.
16333 + */
16334 + for (i = 0; num; i++) {
16335 + if (test_bit(i, cast_ulp(cnt_mask))) {
16336 + rdmsrl(pfm_intel_arch_pmd_desc[i].hw_addr, val);
16337 + if (!(val & wmask))
16338 + return 1;
16339 + num--;
16340 + }
16341 + }
16342 + return 0;
16343 +}
16344 +
16345 +static int pfm_intel_arch_stop_save(struct pfm_context *ctx,
16346 + struct pfm_event_set *set)
16347 +{
16348 + u64 used_mask[PFM_PMC_BV];
16349 + u64 *cnt_pmds;
16350 + u64 val, wmask, ovfl_mask;
16351 + u32 i, count;
16352 +
16353 + wmask = 1ULL << pfm_pmu_conf->counter_width;
16354 +
16355 + bitmap_and(cast_ulp(used_mask),
16356 + cast_ulp(set->used_pmcs),
16357 + cast_ulp(enable_mask),
16358 + max_enable);
16359 +
16360 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
16361 +
16362 + /*
16363 + * stop monitoring
16364 + * Unfortunately, this is very expensive!
16365 + * wrmsrl() is serializing.
16366 + */
16367 + for (i = 0; count; i++) {
16368 + if (test_bit(i, cast_ulp(used_mask))) {
16369 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
16370 + count--;
16371 + }
16372 + }
16373 +
16374 + /*
16375 + * if we already having a pending overflow condition, we simply
16376 + * return to take care of this first.
16377 + */
16378 + if (set->npend_ovfls)
16379 + return 1;
16380 +
16381 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
16382 + cnt_pmds = ctx->regs.cnt_pmds;
16383 +
16384 + /*
16385 + * check for pending overflows and save PMDs (combo)
16386 + * we employ used_pmds because we also need to save
16387 + * and not just check for pending interrupts.
16388 + *
16389 + * Must check for counting PMDs because of virtual PMDs
16390 + */
16391 + count = set->nused_pmds;
16392 + for (i = 0; count; i++) {
16393 + if (test_bit(i, cast_ulp(set->used_pmds))) {
16394 + val = pfm_arch_read_pmd(ctx, i);
16395 + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
16396 + if (!(val & wmask)) {
16397 + __set_bit(i, cast_ulp(set->povfl_pmds));
16398 + set->npend_ovfls++;
16399 + }
16400 + val = (set->pmds[i].value & ~ovfl_mask)
16401 + | (val & ovfl_mask);
16402 + }
16403 + set->pmds[i].value = val;
16404 + count--;
16405 + }
16406 + }
16407 + /* 0 means: no need to save PMDs at upper level */
16408 + return 0;
16409 +}
16410 +
16411 +/**
16412 + * pfm_intel_arch_quiesce - stop monitoring without grabbing any lock
16413 + *
16414 + * called from NMI interrupt handler to immediately stop monitoring
16415 + * cannot grab any lock, including perfmon related locks
16416 + */
16417 +static void __kprobes pfm_intel_arch_quiesce(void)
16418 +{
16419 + u16 i;
16420 +
16421 + /*
16422 + * PMC16 is the fixed control control register so it has a
16423 + * distinct MSR address
16424 + *
16425 + * We do not use the hw_addr field in the table to avoid touching
16426 + * too many cachelines
16427 + */
16428 + for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) {
16429 + if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) {
16430 + if (i == 16)
16431 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
16432 + else
16433 + wrmsrl(MSR_P6_EVNTSEL0+i, 0);
16434 + }
16435 + }
16436 +}
16437 +
16438 +/**
16439 + * pfm_intel_arch_restore_pmcs - reload PMC registers
16440 + * @ctx: context to restore from
16441 + * @set: current event set
16442 + *
16443 + * optimized version of pfm_arch_restore_pmcs(). On architectural perfmon,
16444 + * we can afford to only restore the pmcs registers we use, because they
16445 + * are all independent from each other.
16446 + */
16447 +static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx,
16448 + struct pfm_event_set *set)
16449 +{
16450 + u64 *mask;
16451 + u16 i, num;
16452 +
16453 + mask = set->used_pmcs;
16454 + num = set->nused_pmcs;
16455 + for (i = 0; num; i++) {
16456 + if (test_bit(i, cast_ulp(mask))) {
16457 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]);
16458 + num--;
16459 + }
16460 + }
16461 +}
16462 +/*
16463 + * Counters may have model-specific width. Yet the documentation says
16464 + * that only the lower 32 bits can be written to due to the specification
16465 + * of wrmsr. bits [32-(w-1)] are sign extensions of bit 31. Bits [w-63] must
16466 + * not be set (see rsvd_msk for PMDs). As such the effective width of a
16467 + * counter is 31 bits only regardless of what CPUID.0xa returns.
16468 + *
16469 + * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18
16470 + */
16471 +static struct pfm_pmu_config pfm_intel_arch_pmu_conf = {
16472 + .pmu_name = "Intel architectural",
16473 + .pmd_desc = pfm_intel_arch_pmd_desc,
16474 + .counter_width = 31,
16475 + .num_pmc_entries = PFM_IA_MAX_PMCS,
16476 + .num_pmd_entries = PFM_IA_MAX_PMDS,
16477 + .pmc_desc = pfm_intel_arch_pmc_desc,
16478 + .probe_pmu = pfm_intel_arch_probe_pmu,
16479 + .version = "1.0",
16480 + .flags = PFM_PMU_BUILTIN_FLAG,
16481 + .owner = THIS_MODULE,
16482 + .pmu_info = &pfm_intel_arch_pmu_info
16483 +};
16484 +
16485 +static int __init pfm_intel_arch_pmu_init_module(void)
16486 +{
16487 + return pfm_pmu_register(&pfm_intel_arch_pmu_conf);
16488 +}
16489 +
16490 +static void __exit pfm_intel_arch_pmu_cleanup_module(void)
16491 +{
16492 + pfm_pmu_unregister(&pfm_intel_arch_pmu_conf);
16493 +}
16494 +
16495 +module_init(pfm_intel_arch_pmu_init_module);
16496 +module_exit(pfm_intel_arch_pmu_cleanup_module);
16497 diff --git a/arch/x86/perfmon/perfmon_intel_atom.c b/arch/x86/perfmon/perfmon_intel_atom.c
16498 new file mode 100644
16499 index 0000000..9b94863
16500 --- /dev/null
16501 +++ b/arch/x86/perfmon/perfmon_intel_atom.c
16502 @@ -0,0 +1,541 @@
16503 +/*
16504 + * perfmon support for Intel Atom (architectural perfmon v3 + PEBS)
16505 + *
16506 + * Copyright (c) 2008 Google,Inc
16507 + * Contributed by Stephane Eranian <eranian@gmail.com>
16508 + *
16509 + * This program is free software; you can redistribute it and/or
16510 + * modify it under the terms of version 2 of the GNU General Public
16511 + * License as published by the Free Software Foundation.
16512 + *
16513 + * This program is distributed in the hope that it will be useful,
16514 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16515 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16516 + * General Public License for more details.
16517 + *
16518 + * You should have received a copy of the GNU General Public License
16519 + * along with this program; if not, write to the Free Software
16520 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
16521 + * 02111-1307 USA
16522 + */
16523 +#include <linux/module.h>
16524 +#include <linux/kprobes.h>
16525 +#include <linux/perfmon_kern.h>
16526 +#include <asm/msr.h>
16527 +
16528 +MODULE_AUTHOR("Stephane Eranian <eranian@gmail.com>");
16529 +MODULE_DESCRIPTION("Intel Atom");
16530 +MODULE_LICENSE("GPL");
16531 +
16532 +static int force, force_nmi;
16533 +MODULE_PARM_DESC(force, "bool: force module to load succesfully");
16534 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
16535 +module_param(force, bool, 0600);
16536 +module_param(force_nmi, bool, 0600);
16537 +
16538 +/*
16539 + * - upper 32 bits are reserved
16540 + * - INT: APIC enable bit is reserved (forced to 1)
16541 + *
16542 + * RSVD: reserved bits are 1
16543 + */
16544 +#define PFM_ATOM_PMC_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20))
16545 +
16546 +/*
16547 + * force Local APIC interrupt on overflow
16548 + * disable with NO_EMUL64
16549 + */
16550 +#define PFM_ATOM_PMC_VAL (1ULL<<20)
16551 +#define PFM_ATOM_NO64 (1ULL<<20)
16552 +
16553 +/*
16554 + * Atom counters are 40-bits. 40-bits can be read but ony 31 can be written
16555 + * to due to a limitation of wrmsr. Bits [[63-32] are sign extensions of bit 31.
16556 + * Bits [63-40] must not be set
16557 + *
16558 + * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18
16559 + */
16560 +#define PFM_ATOM_PMD_WIDTH 31
16561 +#define PFM_ATOM_PMD_RSVD ~((1ULL << 40)-1)
16562 +
16563 +static void pfm_intel_atom_acquire_pmu_percpu(void);
16564 +static void pfm_intel_atom_release_pmu_percpu(void);
16565 +static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx,
16566 + struct pfm_event_set *set);
16567 +static int pfm_intel_atom_stop_save(struct pfm_context *ctx,
16568 + struct pfm_event_set *set);
16569 +static int pfm_intel_atom_has_ovfls(struct pfm_context *ctx);
16570 +static void __kprobes pfm_intel_atom_quiesce(void);
16571 +
16572 +struct pfm_arch_pmu_info pfm_intel_atom_pmu_info = {
16573 + .stop_save = pfm_intel_atom_stop_save,
16574 + .has_ovfls = pfm_intel_atom_has_ovfls,
16575 + .quiesce = pfm_intel_atom_quiesce,
16576 + .restore_pmcs = pfm_intel_atom_restore_pmcs,
16577 + .acquire_pmu_percpu = pfm_intel_atom_acquire_pmu_percpu,
16578 + .release_pmu_percpu = pfm_intel_atom_release_pmu_percpu
16579 +
16580 +};
16581 +
16582 +#define PFM_ATOM_C(n) { \
16583 + .type = PFM_REG_I64, \
16584 + .desc = "PERFEVTSEL"#n, \
16585 + .dfl_val = PFM_ATOM_PMC_VAL, \
16586 + .rsvd_msk = PFM_ATOM_PMC_RSVD, \
16587 + .no_emul64_msk = PFM_ATOM_NO64, \
16588 + .hw_addr = MSR_P6_EVNTSEL0 + (n) \
16589 + }
16590 +
16591 +
16592 +static struct pfm_regmap_desc pfm_intel_atom_pmc_desc[] = {
16593 +/* pmc0 */ PFM_ATOM_C(0),
16594 +/* pmc1 */ PFM_ATOM_C(1),
16595 +/* pmc2 */ PMX_NA, PMX_NA,
16596 +/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16597 +/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16598 +/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16599 +/* pmc16 */ { .type = PFM_REG_I,
16600 + .desc = "FIXED_CTRL",
16601 + .dfl_val = 0x0000000000000888ULL, /* force PMI */
16602 + .rsvd_msk = 0xfffffffffffffcccULL, /* 3 fixed counters defined */
16603 + .no_emul64_msk = 0,
16604 + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
16605 + },
16606 +/* pmc17 */{ .type = PFM_REG_W,
16607 + .desc = "PEBS_ENABLE",
16608 + .dfl_val = 0,
16609 + .rsvd_msk = 0xfffffffffffffffeULL,
16610 + .no_emul64_msk = 0,
16611 + .hw_addr = MSR_IA32_PEBS_ENABLE
16612 + }
16613 +};
16614 +#define PFM_ATOM_MAX_PMCS ARRAY_SIZE(pfm_intel_atom_pmc_desc)
16615 +
16616 +#define PFM_ATOM_D(n) \
16617 + { .type = PFM_REG_C, \
16618 + .desc = "PMC"#n, \
16619 + .rsvd_msk = PFM_ATOM_PMD_RSVD, \
16620 + .hw_addr = MSR_P6_PERFCTR0+n, \
16621 + .dep_pmcs[0] = 1ULL << n \
16622 + }
16623 +
16624 +#define PFM_ATOM_FD(n) \
16625 + { .type = PFM_REG_C, \
16626 + .desc = "FIXED_CTR"#n, \
16627 + .rsvd_msk = PFM_ATOM_PMD_RSVD, \
16628 + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
16629 + .dep_pmcs[0] = 1ULL << 16 \
16630 + }
16631 +
16632 +static struct pfm_regmap_desc pfm_intel_atom_pmd_desc[] = {
16633 +/* pmd0 */ PFM_ATOM_D(0),
16634 +/* pmd1 */ PFM_ATOM_D(1),
16635 +/* pmd2 */ PMX_NA,
16636 +/* pmd3 */ PMX_NA,
16637 +/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16638 +/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16639 +/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
16640 +/* pmd16 */ PFM_ATOM_FD(0),
16641 +/* pmd17 */ PFM_ATOM_FD(1),
16642 +/* pmd18 */ PFM_ATOM_FD(2)
16643 +};
16644 +#define PFM_ATOM_MAX_PMDS ARRAY_SIZE(pfm_intel_atom_pmd_desc)
16645 +
16646 +static struct pfm_pmu_config pfm_intel_atom_pmu_conf;
16647 +
16648 +static int pfm_intel_atom_probe_pmu(void)
16649 +{
16650 + if (force)
16651 + goto doit;
16652 +
16653 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL)
16654 + return -1;
16655 +
16656 + if (current_cpu_data.x86 != 6)
16657 + return -1;
16658 +
16659 + if (current_cpu_data.x86_model != 28)
16660 + return -1;
16661 +doit:
16662 + /*
16663 + * having APIC is mandatory, so disregard force option
16664 + */
16665 + if (!cpu_has_apic) {
16666 + PFM_INFO("no Local APIC, try rebooting with lapic option");
16667 + return -1;
16668 + }
16669 +
16670 + PFM_INFO("detected Intel Atom PMU");
16671 +
16672 + if (force_nmi)
16673 + pfm_intel_atom_pmu_info.flags |= PFM_X86_FL_USE_NMI;
16674 +
16675 + return 0;
16676 +}
16677 +
16678 +/**
16679 + * pfm_intel_atom_has_ovfls - check for pending overflow condition
16680 + * @ctx: context to work on
16681 + *
16682 + * detect if counters have overflowed.
16683 + * return:
16684 + * 0 : no overflow
16685 + * 1 : at least one overflow
16686 + */
16687 +static int __kprobes pfm_intel_atom_has_ovfls(struct pfm_context *ctx)
16688 +{
16689 + struct pfm_regmap_desc *d;
16690 + u64 ovf;
16691 +
16692 + d = pfm_pmu_conf->pmd_desc;
16693 + /*
16694 + * read global overflow status register
16695 + * if sharing PMU, then not all bit are ours so must
16696 + * check only the ones we actually use
16697 + */
16698 + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf);
16699 +
16700 + /*
16701 + * for pmd0, we also check PEBS overflow on bit 62
16702 + */
16703 + if ((d[0].type & PFM_REG_I) && (ovf & ((1ull << 62) | 1ull)))
16704 + return 1;
16705 +
16706 + if ((d[1].type & PFM_REG_I) && (ovf & 2ull))
16707 + return 1;
16708 +
16709 + if ((d[16].type & PFM_REG_I) && (ovf & (1ull << 32)))
16710 + return 1;
16711 +
16712 + if ((d[17].type & PFM_REG_I) && (ovf & (2ull << 32)))
16713 + return 1;
16714 +
16715 + if ((d[18].type & PFM_REG_I) && (ovf & (4ull << 32)))
16716 + return 1;
16717 +
16718 + return 0;
16719 +}
16720 +
16721 +/**
16722 + * pfm_intel_atom_stop_save - stop monitoring, collect pending overflow, save pmds
16723 + * @ctx: context to work on
16724 + * @set: active set
16725 + *
16726 + * return:
16727 + * 1: caller needs to save pmds
16728 + * 0: caller does not need to save pmds, they have been saved by this call
16729 + */
16730 +static int pfm_intel_atom_stop_save(struct pfm_context *ctx,
16731 + struct pfm_event_set *set)
16732 +{
16733 +#define PFM_ATOM_WMASK (1ULL << 31)
16734 +#define PFM_ATOM_OMASK ((1ULL << 31)-1)
16735 + u64 clear_ovf = 0;
16736 + u64 ovf, ovf2, val;
16737 +
16738 + /*
16739 + * read global overflow status register
16740 + * if sharing PMU, then not all bit are ours so must
16741 + * check only the ones we actually use.
16742 + *
16743 + * XXX: Atom seems to have a bug with the stickyness of
16744 + * GLOBAL_STATUS. If we read GLOBAL_STATUS after we
16745 + * clear the generic counters, then their bits in
16746 + * GLOBAL_STATUS are cleared. This should not be the
16747 + * case accoding to architected PMU. To workaround
16748 + * the problem, we read GLOBAL_STATUS BEFORE we stop
16749 + * all monitoring.
16750 + */
16751 + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf);
16752 +
16753 + /*
16754 + * stop monitoring
16755 + */
16756 + if (test_bit(0, cast_ulp(set->used_pmcs)))
16757 + wrmsrl(MSR_P6_EVNTSEL0, 0);
16758 +
16759 + if (test_bit(1, cast_ulp(set->used_pmcs)))
16760 + wrmsrl(MSR_P6_EVNTSEL1, 0);
16761 +
16762 + if (test_bit(16, cast_ulp(set->used_pmcs)))
16763 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
16764 +
16765 + if (test_bit(17, cast_ulp(set->used_pmcs)))
16766 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
16767 +
16768 + /*
16769 + * XXX: related to bug mentioned above
16770 + *
16771 + * read GLOBAL_STATUS again to avoid race condition
16772 + * with overflows happening after first read and
16773 + * before stop. That avoids missing overflows on
16774 + * the fixed counters and PEBS
16775 + */
16776 + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf2);
16777 + ovf |= ovf2;
16778 +
16779 + /*
16780 + * if we already have a pending overflow condition, we simply
16781 + * return to take care of it first.
16782 + */
16783 + if (set->npend_ovfls)
16784 + return 1;
16785 +
16786 + /*
16787 + * check PMD 0,1,16,17,18 for overflow and save their value
16788 + */
16789 + if (test_bit(0, cast_ulp(set->used_pmds))) {
16790 + rdmsrl(MSR_P6_PERFCTR0, val);
16791 + if (ovf & ((1ull<<62)|1ull)) {
16792 + __set_bit(0, cast_ulp(set->povfl_pmds));
16793 + set->npend_ovfls++;
16794 + clear_ovf = (1ull << 62) | 1ull;
16795 + }
16796 + val = (set->pmds[0].value & ~PFM_ATOM_OMASK)
16797 + | (val & PFM_ATOM_OMASK);
16798 + set->pmds[0].value = val;
16799 + }
16800 +
16801 + if (test_bit(1, cast_ulp(set->used_pmds))) {
16802 + rdmsrl(MSR_P6_PERFCTR1, val);
16803 + if (ovf & 2ull) {
16804 + __set_bit(1, cast_ulp(set->povfl_pmds));
16805 + set->npend_ovfls++;
16806 + clear_ovf |= 2ull;
16807 + }
16808 + val = (set->pmds[1].value & ~PFM_ATOM_OMASK)
16809 + | (val & PFM_ATOM_OMASK);
16810 + set->pmds[1].value = val;
16811 + }
16812 +
16813 + if (test_bit(16, cast_ulp(set->used_pmds))) {
16814 + rdmsrl(MSR_CORE_PERF_FIXED_CTR0, val);
16815 + if (ovf & (1ull << 32)) {
16816 + __set_bit(16, cast_ulp(set->povfl_pmds));
16817 + set->npend_ovfls++;
16818 + clear_ovf |= 1ull << 32;
16819 + }
16820 + val = (set->pmds[16].value & ~PFM_ATOM_OMASK)
16821 + | (val & PFM_ATOM_OMASK);
16822 + set->pmds[16].value = val;
16823 + }
16824 +
16825 + if (test_bit(17, cast_ulp(set->used_pmds))) {
16826 + rdmsrl(MSR_CORE_PERF_FIXED_CTR0+1, val);
16827 + if (ovf & (2ull << 32)) {
16828 + __set_bit(17, cast_ulp(set->povfl_pmds));
16829 + set->npend_ovfls++;
16830 + clear_ovf |= 2ull << 32;
16831 + }
16832 + val = (set->pmds[17].value & ~PFM_ATOM_OMASK)
16833 + | (val & PFM_ATOM_OMASK);
16834 + set->pmds[17].value = val;
16835 + }
16836 +
16837 + if (test_bit(18, cast_ulp(set->used_pmds))) {
16838 + rdmsrl(MSR_CORE_PERF_FIXED_CTR0+2, val);
16839 + if (ovf & (4ull << 32)) {
16840 + __set_bit(18, cast_ulp(set->povfl_pmds));
16841 + set->npend_ovfls++;
16842 + clear_ovf |= 4ull << 32;
16843 + }
16844 + val = (set->pmds[18].value & ~PFM_ATOM_OMASK)
16845 + | (val & PFM_ATOM_OMASK);
16846 + set->pmds[18].value = val;
16847 + }
16848 +
16849 + if (clear_ovf)
16850 + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf);
16851 +
16852 + /* 0 means: no need to save PMDs at upper level */
16853 + return 0;
16854 +}
16855 +
16856 +/**
16857 + * pfm_intel_atom_quiesce - stop monitoring without grabbing any lock
16858 + *
16859 + * called from NMI interrupt handler to immediately stop monitoring
16860 + * cannot grab any lock, including perfmon related locks
16861 + */
16862 +static void __kprobes pfm_intel_atom_quiesce(void)
16863 +{
16864 + /*
16865 + * quiesce PMU by clearing available registers that have
16866 + * the start/stop capability
16867 + */
16868 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16869 + wrmsrl(MSR_P6_EVNTSEL0, 0);
16870 +
16871 + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16872 + wrmsrl(MSR_P6_EVNTSEL1, 0);
16873 +
16874 + if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16875 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
16876 +
16877 + if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
16878 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
16879 +}
16880 +
16881 +/**
16882 + * pfm_intel_atom_restore_pmcs - reload PMC registers
16883 + * @ctx: context to restore from
16884 + * @set: current event set
16885 + *
16886 + * restores pmcs and also PEBS Data Save area pointer
16887 + */
16888 +static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx,
16889 + struct pfm_event_set *set)
16890 +{
16891 + struct pfm_arch_context *ctx_arch;
16892 + u64 clear_ovf = 0;
16893 +
16894 + ctx_arch = pfm_ctx_arch(ctx);
16895 + /*
16896 + * must restore DS pointer before restoring PMCs
16897 + * as this can potentially reactivate monitoring
16898 + */
16899 + if (ctx_arch->flags.use_ds)
16900 + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
16901 +
16902 + if (test_bit(0, cast_ulp(set->used_pmcs))) {
16903 + wrmsrl(MSR_P6_EVNTSEL0, set->pmcs[0]);
16904 + clear_ovf = 1ull;
16905 + }
16906 +
16907 + if (test_bit(1, cast_ulp(set->used_pmcs))) {
16908 + wrmsrl(MSR_P6_EVNTSEL1, set->pmcs[1]);
16909 + clear_ovf |= 2ull;
16910 + }
16911 +
16912 + if (test_bit(16, cast_ulp(set->used_pmcs))) {
16913 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, set->pmcs[16]);
16914 + clear_ovf |= 7ull << 32;
16915 + }
16916 +
16917 + if (test_bit(17, cast_ulp(set->used_pmcs))) {
16918 + wrmsrl(MSR_IA32_PEBS_ENABLE, set->pmcs[17]);
16919 + clear_ovf |= 1ull << 62;
16920 + }
16921 +
16922 + if (clear_ovf)
16923 + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf);
16924 +}
16925 +
16926 +static int pfm_intel_atom_pmc17_check(struct pfm_context *ctx,
16927 + struct pfm_event_set *set,
16928 + struct pfarg_pmc *req)
16929 +{
16930 + struct pfm_arch_context *ctx_arch;
16931 + ctx_arch = pfm_ctx_arch(ctx);
16932 +
16933 + /*
16934 + * if user activates PEBS_ENABLE, then we need to have a valid
16935 + * DS Area setup. This only happens when the PEBS sampling format is
16936 + * used in which case PFM_X86_USE_PEBS is set. We must reject all other
16937 + * requests.
16938 + *
16939 + * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears
16940 + * that a value of 0 for this MSR does crash the system with
16941 + * PEBS_ENABLE=1.
16942 + */
16943 + if (!ctx_arch->flags.use_pebs && req->reg_value) {
16944 + PFM_DBG("pmc17 useable only with a PEBS sampling format");
16945 + return -EINVAL;
16946 + }
16947 + return 0;
16948 +}
16949 +
16950 +DEFINE_PER_CPU(u64, saved_global_ctrl);
16951 +
16952 +/**
16953 + * pfm_intel_atom_acquire_pmu_percpu - acquire PMU resource per CPU
16954 + *
16955 + * For Atom, it is necessary to enable all available
16956 + * registers. The firmware rightfully has the fixed counters
16957 + * disabled for backward compatibility with architectural perfmon
16958 + * v1
16959 + *
16960 + * This function is invoked on each online CPU
16961 + */
16962 +static void pfm_intel_atom_acquire_pmu_percpu(void)
16963 +{
16964 + struct pfm_regmap_desc *d;
16965 + u64 mask = 0;
16966 + unsigned int i;
16967 +
16968 + /*
16969 + * build bitmask of registers that are available to
16970 + * us. In some cases, there may be fewer registers than
16971 + * what Atom supports due to sharing with other kernel
16972 + * subsystems, such as NMI
16973 + */
16974 + d = pfm_pmu_conf->pmd_desc;
16975 + for (i=0; i < 16; i++) {
16976 + if ((d[i].type & PFM_REG_I) == 0)
16977 + continue;
16978 + mask |= 1ull << i;
16979 + }
16980 + for (i=16; i < PFM_ATOM_MAX_PMDS; i++) {
16981 + if ((d[i].type & PFM_REG_I) == 0)
16982 + continue;
16983 + mask |= 1ull << (32+i-16);
16984 + }
16985 +
16986 + /*
16987 + * keep a local copy of the current MSR_CORE_PERF_GLOBAL_CTRL
16988 + */
16989 + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl));
16990 +
16991 + PFM_DBG("global=0x%llx set to 0x%llx",
16992 + __get_cpu_var(saved_global_ctrl),
16993 + mask);
16994 +
16995 + /*
16996 + * enable all registers
16997 + *
16998 + * No need to quiesce PMU. If there is a overflow, it will be
16999 + * treated as spurious by the handler
17000 + */
17001 + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, mask);
17002 +}
17003 +
17004 +/**
17005 + * pfm_intel_atom_release_pmu_percpu - release PMU resource per CPU
17006 + *
17007 + * For Atom, we restore MSR_CORE_PERF_GLOBAL_CTRL to its orginal value
17008 + */
17009 +static void pfm_intel_atom_release_pmu_percpu(void)
17010 +{
17011 + PFM_DBG("global_ctrl restored to 0x%llx\n",
17012 + __get_cpu_var(saved_global_ctrl));
17013 +
17014 + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl));
17015 +}
17016 +
17017 +static struct pfm_pmu_config pfm_intel_atom_pmu_conf = {
17018 + .pmu_name = "Intel Atom",
17019 + .pmd_desc = pfm_intel_atom_pmd_desc,
17020 + .counter_width = PFM_ATOM_PMD_WIDTH,
17021 + .num_pmc_entries = PFM_ATOM_MAX_PMCS,
17022 + .num_pmd_entries = PFM_ATOM_MAX_PMDS,
17023 + .pmc_desc = pfm_intel_atom_pmc_desc,
17024 + .probe_pmu = pfm_intel_atom_probe_pmu,
17025 + .version = "1.0",
17026 + .flags = PFM_PMU_BUILTIN_FLAG,
17027 + .owner = THIS_MODULE,
17028 + .pmc_write_check = pfm_intel_atom_pmc17_check,
17029 + .pmu_info = &pfm_intel_atom_pmu_info
17030 +};
17031 +
17032 +static int __init pfm_intel_atom_pmu_init_module(void)
17033 +{
17034 + return pfm_pmu_register(&pfm_intel_atom_pmu_conf);
17035 +}
17036 +
17037 +static void __exit pfm_intel_atom_pmu_cleanup_module(void)
17038 +{
17039 + pfm_pmu_unregister(&pfm_intel_atom_pmu_conf);
17040 +}
17041 +
17042 +module_init(pfm_intel_atom_pmu_init_module);
17043 +module_exit(pfm_intel_atom_pmu_cleanup_module);
17044 diff --git a/arch/x86/perfmon/perfmon_intel_core.c b/arch/x86/perfmon/perfmon_intel_core.c
17045 new file mode 100644
17046 index 0000000..fddc436
17047 --- /dev/null
17048 +++ b/arch/x86/perfmon/perfmon_intel_core.c
17049 @@ -0,0 +1,449 @@
17050 +/*
17051 + * This file contains the Intel Core PMU registers description tables.
17052 + * Intel Core-based processors support architectural perfmon v2 + PEBS
17053 + *
17054 + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P.
17055 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
17056 + */
17057 +#include <linux/module.h>
17058 +#include <linux/kprobes.h>
17059 +#include <linux/perfmon_kern.h>
17060 +#include <linux/nmi.h>
17061 +
17062 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
17063 +MODULE_DESCRIPTION("Intel Core");
17064 +MODULE_LICENSE("GPL");
17065 +
17066 +static int force_nmi;
17067 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
17068 +module_param(force_nmi, bool, 0600);
17069 +
17070 +/*
17071 + * - upper 32 bits are reserved
17072 + * - INT: APIC enable bit is reserved (forced to 1)
17073 + * - bit 21 is reserved
17074 + *
17075 + * RSVD: reserved bits must be 1
17076 + */
17077 +#define PFM_CORE_PMC_RSVD ((~((1ULL<<32)-1)) \
17078 + | (1ULL<<20) \
17079 + | (1ULL<<21))
17080 +
17081 +/*
17082 + * Core counters are 40-bits
17083 + */
17084 +#define PFM_CORE_CTR_RSVD (~((1ULL<<40)-1))
17085 +
17086 +/*
17087 + * force Local APIC interrupt on overflow
17088 + * disable with NO_EMUL64
17089 + */
17090 +#define PFM_CORE_PMC_VAL (1ULL<<20)
17091 +#define PFM_CORE_NO64 (1ULL<<20)
17092 +
17093 +#define PFM_CORE_NA { .reg_type = PFM_REGT_NA}
17094 +
17095 +#define PFM_CORE_CA(m, c, t) \
17096 + { \
17097 + .addrs[0] = m, \
17098 + .ctr = c, \
17099 + .reg_type = t \
17100 + }
17101 +
17102 +struct pfm_ds_area_intel_core {
17103 + u64 bts_buf_base;
17104 + u64 bts_index;
17105 + u64 bts_abs_max;
17106 + u64 bts_intr_thres;
17107 + u64 pebs_buf_base;
17108 + u64 pebs_index;
17109 + u64 pebs_abs_max;
17110 + u64 pebs_intr_thres;
17111 + u64 pebs_cnt_reset;
17112 +};
17113 +
17114 +static void pfm_core_restore_pmcs(struct pfm_context *ctx,
17115 + struct pfm_event_set *set);
17116 +static int pfm_core_has_ovfls(struct pfm_context *ctx);
17117 +static int pfm_core_stop_save(struct pfm_context *ctx,
17118 + struct pfm_event_set *set);
17119 +static void __kprobes pfm_core_quiesce(void);
17120 +
17121 +static u64 enable_mask[PFM_MAX_PMCS];
17122 +static u16 max_enable;
17123 +
17124 +struct pfm_arch_pmu_info pfm_core_pmu_info = {
17125 + .stop_save = pfm_core_stop_save,
17126 + .has_ovfls = pfm_core_has_ovfls,
17127 + .quiesce = pfm_core_quiesce,
17128 + .restore_pmcs = pfm_core_restore_pmcs
17129 +};
17130 +
17131 +static struct pfm_regmap_desc pfm_core_pmc_desc[] = {
17132 +/* pmc0 */ {
17133 + .type = PFM_REG_I64,
17134 + .desc = "PERFEVTSEL0",
17135 + .dfl_val = PFM_CORE_PMC_VAL,
17136 + .rsvd_msk = PFM_CORE_PMC_RSVD,
17137 + .no_emul64_msk = PFM_CORE_NO64,
17138 + .hw_addr = MSR_P6_EVNTSEL0
17139 + },
17140 +/* pmc1 */ {
17141 + .type = PFM_REG_I64,
17142 + .desc = "PERFEVTSEL1",
17143 + .dfl_val = PFM_CORE_PMC_VAL,
17144 + .rsvd_msk = PFM_CORE_PMC_RSVD,
17145 + .no_emul64_msk = PFM_CORE_NO64,
17146 + .hw_addr = MSR_P6_EVNTSEL1
17147 + },
17148 +/* pmc2 */ PMX_NA, PMX_NA,
17149 +/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17150 +/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17151 +/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17152 +/* pmc16 */ { .type = PFM_REG_I,
17153 + .desc = "FIXED_CTRL",
17154 + .dfl_val = 0x888ULL,
17155 + .rsvd_msk = 0xfffffffffffffcccULL,
17156 + .no_emul64_msk = 0,
17157 + .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
17158 + },
17159 +/* pmc17 */ { .type = PFM_REG_W,
17160 + .desc = "PEBS_ENABLE",
17161 + .dfl_val = 0,
17162 + .rsvd_msk = 0xfffffffffffffffeULL,
17163 + .no_emul64_msk = 0,
17164 + .hw_addr = MSR_IA32_PEBS_ENABLE
17165 + }
17166 +};
17167 +
17168 +#define PFM_CORE_D(n) \
17169 + { .type = PFM_REG_C, \
17170 + .desc = "PMC"#n, \
17171 + .rsvd_msk = PFM_CORE_CTR_RSVD, \
17172 + .hw_addr = MSR_P6_PERFCTR0+n, \
17173 + .dep_pmcs[0] = 1ULL << n \
17174 + }
17175 +
17176 +#define PFM_CORE_FD(n) \
17177 + { .type = PFM_REG_C, \
17178 + .desc = "FIXED_CTR"#n, \
17179 + .rsvd_msk = PFM_CORE_CTR_RSVD, \
17180 + .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
17181 + .dep_pmcs[0] = 1ULL << 16 \
17182 + }
17183 +
17184 +static struct pfm_regmap_desc pfm_core_pmd_desc[] = {
17185 +/* pmd0 */ PFM_CORE_D(0),
17186 +/* pmd1 */ PFM_CORE_D(1),
17187 +/* pmd2 */ PMX_NA, PMX_NA,
17188 +/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17189 +/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17190 +/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
17191 +/* pmd16 */ PFM_CORE_FD(0),
17192 +/* pmd17 */ PFM_CORE_FD(1),
17193 +/* pmd18 */ PFM_CORE_FD(2)
17194 +};
17195 +#define PFM_CORE_NUM_PMCS ARRAY_SIZE(pfm_core_pmc_desc)
17196 +#define PFM_CORE_NUM_PMDS ARRAY_SIZE(pfm_core_pmd_desc)
17197 +
17198 +static struct pfm_pmu_config pfm_core_pmu_conf;
17199 +
17200 +static int pfm_core_probe_pmu(void)
17201 +{
17202 + /*
17203 + * Check for Intel Core processor explicitely
17204 + * Checking for cpu_has_perfmon is not enough as this
17205 + * matches intel Core Duo/Core Solo but none supports
17206 + * PEBS.
17207 + *
17208 + * Intel Core = arch perfmon v2 + PEBS
17209 + */
17210 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
17211 + PFM_INFO("not an AMD processor");
17212 + return -1;
17213 + }
17214 +
17215 + if (current_cpu_data.x86 != 6)
17216 + return -1;
17217 +
17218 + switch (current_cpu_data.x86_model) {
17219 + case 15: /* Merom */
17220 + break;
17221 + case 23: /* Penryn */
17222 + break;
17223 + case 29: /* Dunnington */
17224 + break;
17225 + default:
17226 + return -1;
17227 + }
17228 +
17229 + if (!cpu_has_apic) {
17230 + PFM_INFO("no Local APIC, unsupported");
17231 + return -1;
17232 + }
17233 +
17234 + PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d",
17235 + nmi_watchdog, atomic_read(&nmi_active), force_nmi);
17236 +
17237 + /*
17238 + * Intel Core processors implement DS and PEBS, no need to check
17239 + */
17240 + if (cpu_has_pebs)
17241 + PFM_INFO("PEBS supported, enabled");
17242 +
17243 + /*
17244 + * initialize bitmask of register with enable capability, i.e.,
17245 + * startstop. This is used to restrict the number of registers to
17246 + * touch on start/stop
17247 + * max_enable: number of bits to scan in enable_mask = highest + 1
17248 + *
17249 + * may be adjusted in pfm_arch_pmu_acquire()
17250 + */
17251 + __set_bit(0, cast_ulp(enable_mask));
17252 + __set_bit(1, cast_ulp(enable_mask));
17253 + __set_bit(16, cast_ulp(enable_mask));
17254 + __set_bit(17, cast_ulp(enable_mask));
17255 + max_enable = 17+1;
17256 +
17257 + if (force_nmi)
17258 + pfm_core_pmu_info.flags |= PFM_X86_FL_USE_NMI;
17259 +
17260 + return 0;
17261 +}
17262 +
17263 +static int pfm_core_pmc17_check(struct pfm_context *ctx,
17264 + struct pfm_event_set *set,
17265 + struct pfarg_pmc *req)
17266 +{
17267 + struct pfm_arch_context *ctx_arch;
17268 + ctx_arch = pfm_ctx_arch(ctx);
17269 +
17270 + /*
17271 + * if user activates PEBS_ENABLE, then we need to have a valid
17272 + * DS Area setup. This only happens when the PEBS sampling format is
17273 + * used in which case PFM_X86_USE_PEBS is set. We must reject all other
17274 + * requests.
17275 + *
17276 + * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears
17277 + * that a value of 0 for this MSR does crash the system with
17278 + * PEBS_ENABLE=1.
17279 + */
17280 + if (!ctx_arch->flags.use_pebs && req->reg_value) {
17281 + PFM_DBG("pmc17 useable only with a PEBS sampling format");
17282 + return -EINVAL;
17283 + }
17284 + return 0;
17285 +}
17286 +
17287 +/*
17288 + * detect is counters have overflowed.
17289 + * return:
17290 + * 0 : no overflow
17291 + * 1 : at least one overflow
17292 + *
17293 + * used by Intel Core-based processors
17294 + */
17295 +static int __kprobes pfm_core_has_ovfls(struct pfm_context *ctx)
17296 +{
17297 + struct pfm_arch_pmu_info *pmu_info;
17298 + u64 *cnt_mask;
17299 + u64 wmask, val;
17300 + u16 i, num;
17301 +
17302 + pmu_info = &pfm_core_pmu_info;
17303 + cnt_mask = ctx->regs.cnt_pmds;
17304 + num = ctx->regs.num_counters;
17305 + wmask = 1ULL << pfm_pmu_conf->counter_width;
17306 +
17307 + for (i = 0; num; i++) {
17308 + if (test_bit(i, cast_ulp(cnt_mask))) {
17309 + rdmsrl(pfm_core_pmd_desc[i].hw_addr, val);
17310 + if (!(val & wmask))
17311 + return 1;
17312 + num--;
17313 + }
17314 + }
17315 + return 0;
17316 +}
17317 +
17318 +static int pfm_core_stop_save(struct pfm_context *ctx,
17319 + struct pfm_event_set *set)
17320 +{
17321 + struct pfm_arch_context *ctx_arch;
17322 + struct pfm_ds_area_intel_core *ds = NULL;
17323 + u64 used_mask[PFM_PMC_BV];
17324 + u64 *cnt_mask;
17325 + u64 val, wmask, ovfl_mask;
17326 + u16 count, has_ovfl;
17327 + u16 i, pebs_idx = ~0;
17328 +
17329 + ctx_arch = pfm_ctx_arch(ctx);
17330 +
17331 + wmask = 1ULL << pfm_pmu_conf->counter_width;
17332 +
17333 + /*
17334 + * used enable pmc bitmask
17335 + */
17336 + bitmap_and(cast_ulp(used_mask),
17337 + cast_ulp(set->used_pmcs),
17338 + cast_ulp(enable_mask),
17339 + max_enable);
17340 +
17341 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
17342 + /*
17343 + * stop monitoring
17344 + * Unfortunately, this is very expensive!
17345 + * wrmsrl() is serializing.
17346 + */
17347 + for (i = 0; count; i++) {
17348 + if (test_bit(i, cast_ulp(used_mask))) {
17349 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
17350 + count--;
17351 + }
17352 + }
17353 + /*
17354 + * if we already having a pending overflow condition, we simply
17355 + * return to take care of this first.
17356 + */
17357 + if (set->npend_ovfls)
17358 + return 1;
17359 +
17360 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
17361 + cnt_mask = ctx->regs.cnt_pmds;
17362 +
17363 + if (ctx_arch->flags.use_pebs) {
17364 + ds = ctx_arch->ds_area;
17365 + pebs_idx = 0; /* PMC0/PMD0 */
17366 + PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx",
17367 + ds,
17368 + (unsigned long long)ds->pebs_index,
17369 + (unsigned long long)ds->pebs_intr_thres);
17370 + }
17371 +
17372 + /*
17373 + * Check for pending overflows and save PMDs (combo)
17374 + * We employ used_pmds and not intr_pmds because we must
17375 + * also saved on PMD registers.
17376 + * Must check for counting PMDs because of virtual PMDs
17377 + *
17378 + * XXX: should use the ovf_status register instead, yet
17379 + * we would have to check if NMI is used and fallback
17380 + * to individual pmd inspection.
17381 + */
17382 + count = set->nused_pmds;
17383 +
17384 + for (i = 0; count; i++) {
17385 + if (test_bit(i, cast_ulp(set->used_pmds))) {
17386 + val = pfm_arch_read_pmd(ctx, i);
17387 + if (likely(test_bit(i, cast_ulp(cnt_mask)))) {
17388 + if (i == pebs_idx)
17389 + has_ovfl = (ds->pebs_index >=
17390 + ds->pebs_intr_thres);
17391 + else
17392 + has_ovfl = !(val & wmask);
17393 + if (has_ovfl) {
17394 + __set_bit(i, cast_ulp(set->povfl_pmds));
17395 + set->npend_ovfls++;
17396 + }
17397 + val = (set->pmds[i].value & ~ovfl_mask)
17398 + | (val & ovfl_mask);
17399 + }
17400 + set->pmds[i].value = val;
17401 + count--;
17402 + }
17403 + }
17404 + /* 0 means: no need to save PMDs at upper level */
17405 + return 0;
17406 +}
17407 +
17408 +/**
17409 + * pfm_core_quiesce - stop monitoring without grabbing any lock
17410 + *
17411 + * called from NMI interrupt handler to immediately stop monitoring
17412 + * cannot grab any lock, including perfmon related locks
17413 + */
17414 +static void __kprobes pfm_core_quiesce(void)
17415 +{
17416 + /*
17417 + * quiesce PMU by clearing available registers that have
17418 + * the start/stop capability
17419 + */
17420 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17421 + wrmsrl(MSR_P6_EVNTSEL0, 0);
17422 + if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17423 + wrmsrl(MSR_P6_EVNTSEL1, 0);
17424 + if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17425 + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
17426 + if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
17427 + wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
17428 +}
17429 +/**
17430 + * pfm_core_restore_pmcs - reload PMC registers
17431 + * @ctx: context to restore from
17432 + * @set: current event set
17433 + *
17434 + * optimized version of pfm_arch_restore_pmcs(). On Core, we can
17435 + * afford to only restore the pmcs registers we use, because they are
17436 + * all independent from each other.
17437 + */
17438 +static void pfm_core_restore_pmcs(struct pfm_context *ctx,
17439 + struct pfm_event_set *set)
17440 +{
17441 + struct pfm_arch_context *ctx_arch;
17442 + u64 *mask;
17443 + u16 i, num;
17444 +
17445 + ctx_arch = pfm_ctx_arch(ctx);
17446 +
17447 + /*
17448 + * must restore DS pointer before restoring PMCs
17449 + * as this can potentially reactivate monitoring
17450 + */
17451 + if (ctx_arch->flags.use_ds)
17452 + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
17453 +
17454 + mask = set->used_pmcs;
17455 + num = set->nused_pmcs;
17456 + for (i = 0; num; i++) {
17457 + if (test_bit(i, cast_ulp(mask))) {
17458 + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]);
17459 + num--;
17460 + }
17461 + }
17462 +}
17463 +
17464 +/*
17465 + * Counters may have model-specific width which can be probed using
17466 + * the CPUID.0xa leaf. Yet, the documentation says: "
17467 + * In the initial implementation, only the read bit width is reported
17468 + * by CPUID, write operations are limited to the low 32 bits.
17469 + * Bits [w-32] are sign extensions of bit 31. As such the effective width
17470 + * of a counter is 31 bits only.
17471 + */
17472 +static struct pfm_pmu_config pfm_core_pmu_conf = {
17473 + .pmu_name = "Intel Core",
17474 + .pmd_desc = pfm_core_pmd_desc,
17475 + .counter_width = 31,
17476 + .num_pmc_entries = PFM_CORE_NUM_PMCS,
17477 + .num_pmd_entries = PFM_CORE_NUM_PMDS,
17478 + .pmc_desc = pfm_core_pmc_desc,
17479 + .probe_pmu = pfm_core_probe_pmu,
17480 + .version = "1.2",
17481 + .flags = PFM_PMU_BUILTIN_FLAG,
17482 + .owner = THIS_MODULE,
17483 + .pmu_info = &pfm_core_pmu_info,
17484 + .pmc_write_check = pfm_core_pmc17_check
17485 +};
17486 +
17487 +static int __init pfm_core_pmu_init_module(void)
17488 +{
17489 + return pfm_pmu_register(&pfm_core_pmu_conf);
17490 +}
17491 +
17492 +static void __exit pfm_core_pmu_cleanup_module(void)
17493 +{
17494 + pfm_pmu_unregister(&pfm_core_pmu_conf);
17495 +}
17496 +
17497 +module_init(pfm_core_pmu_init_module);
17498 +module_exit(pfm_core_pmu_cleanup_module);
17499 diff --git a/arch/x86/perfmon/perfmon_p4.c b/arch/x86/perfmon/perfmon_p4.c
17500 new file mode 100644
17501 index 0000000..1ffcf3c
17502 --- /dev/null
17503 +++ b/arch/x86/perfmon/perfmon_p4.c
17504 @@ -0,0 +1,913 @@
17505 +/*
17506 + * This file contains the P4/Xeon PMU register description tables
17507 + * for both 32 and 64 bit modes.
17508 + *
17509 + * Copyright (c) 2005 Intel Corporation
17510 + * Contributed by Bryan Wilkerson <bryan.p.wilkerson@intel.com>
17511 + *
17512 + * This program is free software; you can redistribute it and/or
17513 + * modify it under the terms of version 2 of the GNU General Public
17514 + * License as published by the Free Software Foundation.
17515 + *
17516 + * This program is distributed in the hope that it will be useful,
17517 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
17518 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17519 + * General Public License for more details.
17520 + *
17521 + * You should have received a copy of the GNU General Public License
17522 + * along with this program; if not, write to the Free Software
17523 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17524 + * 02111-1307 USA
17525 + */
17526 +#include <linux/module.h>
17527 +#include <linux/perfmon_kern.h>
17528 +#include <linux/kprobes.h>
17529 +#include <linux/nmi.h>
17530 +#include <asm/msr.h>
17531 +#include <asm/apic.h>
17532 +
17533 +MODULE_AUTHOR("Bryan Wilkerson <bryan.p.wilkerson@intel.com>");
17534 +MODULE_DESCRIPTION("P4/Xeon/EM64T PMU description table");
17535 +MODULE_LICENSE("GPL");
17536 +
17537 +static int force;
17538 +MODULE_PARM_DESC(force, "bool: force module to load succesfully");
17539 +module_param(force, bool, 0600);
17540 +
17541 +static int force_nmi;
17542 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
17543 +module_param(force_nmi, bool, 0600);
17544 +
17545 +/*
17546 + * For extended register information in addition to address that is used
17547 + * at runtime to figure out the mapping of reg addresses to logical procs
17548 + * and association of registers to hardware specific features
17549 + */
17550 +struct pfm_p4_regmap {
17551 + /*
17552 + * one each for the logical CPUs. Index 0 corresponds to T0 and
17553 + * index 1 corresponds to T1. Index 1 can be zero if no T1
17554 + * complement reg exists.
17555 + */
17556 + unsigned long addrs[2]; /* 2 = number of threads */
17557 + unsigned int ctr; /* for CCCR/PERFEVTSEL, associated counter */
17558 + unsigned int reg_type;
17559 +};
17560 +
17561 +/*
17562 + * bitmask for pfm_p4_regmap.reg_type
17563 + */
17564 +#define PFM_REGT_NA 0x0000 /* not available */
17565 +#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */
17566 +#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */
17567 +#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */
17568 +#define PFM_REGT_PEBS 0x0010 /* PEBS related */
17569 +#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */
17570 +#define PFM_REGT_CTR 0x0040 /* counter */
17571 +
17572 +/*
17573 + * architecture specific context extension.
17574 + * located at: (struct pfm_arch_context *)(ctx+1)
17575 + */
17576 +struct pfm_arch_p4_context {
17577 + u32 npend_ovfls; /* P4 NMI #pending ovfls */
17578 + u32 reserved;
17579 + u64 povfl_pmds[PFM_PMD_BV]; /* P4 NMI overflowed counters */
17580 + u64 saved_cccrs[PFM_MAX_PMCS];
17581 +};
17582 +
17583 +/*
17584 + * ESCR reserved bitmask:
17585 + * - bits 31 - 63 reserved
17586 + * - T1_OS and T1_USR bits are reserved - set depending on logical proc
17587 + * user mode application should use T0_OS and T0_USR to indicate
17588 + * RSVD: reserved bits must be 1
17589 + */
17590 +#define PFM_ESCR_RSVD ~0x000000007ffffffcULL
17591 +
17592 +/*
17593 + * CCCR default value:
17594 + * - OVF_PMI_T0=1 (bit 26)
17595 + * - OVF_PMI_T1=0 (bit 27) (set if necessary in pfm_write_reg())
17596 + * - all other bits are zero
17597 + *
17598 + * OVF_PMI is forced to zero if PFM_REGFL_NO_EMUL64 is set on CCCR
17599 + */
17600 +#define PFM_CCCR_DFL (1ULL<<26) | (3ULL<<16)
17601 +
17602 +/*
17603 + * CCCR reserved fields:
17604 + * - bits 0-11, 25-29, 31-63
17605 + * - OVF_PMI (26-27), override with REGFL_NO_EMUL64
17606 + *
17607 + * RSVD: reserved bits must be 1
17608 + */
17609 +#define PFM_CCCR_RSVD ~((0xfull<<12) \
17610 + | (0x7full<<18) \
17611 + | (0x1ull<<30))
17612 +
17613 +#define PFM_P4_NO64 (3ULL<<26) /* use 3 even in non HT mode */
17614 +
17615 +#define PEBS_PMD 8 /* thread0: IQ_CTR4, thread1: IQ_CTR5 */
17616 +
17617 +/*
17618 + * With HyperThreading enabled:
17619 + *
17620 + * The ESCRs and CCCRs are divided in half with the top half
17621 + * belonging to logical processor 0 and the bottom half going to
17622 + * logical processor 1. Thus only half of the PMU resources are
17623 + * accessible to applications.
17624 + *
17625 + * PEBS is not available due to the fact that:
17626 + * - MSR_PEBS_MATRIX_VERT is shared between the threads
17627 + * - IA32_PEBS_ENABLE is shared between the threads
17628 + *
17629 + * With HyperThreading disabled:
17630 + *
17631 + * The full set of PMU resources is exposed to applications.
17632 + *
17633 + * The mapping is chosen such that PMCxx -> MSR is the same
17634 + * in HT and non HT mode, if register is present in HT mode.
17635 + *
17636 + */
17637 +#define PFM_REGT_NHTESCR (PFM_REGT_ESCR|PFM_REGT_NOHT)
17638 +#define PFM_REGT_NHTCCCR (PFM_REGT_CCCR|PFM_REGT_NOHT|PFM_REGT_EN)
17639 +#define PFM_REGT_NHTPEBS (PFM_REGT_PEBS|PFM_REGT_NOHT|PFM_REGT_EN)
17640 +#define PFM_REGT_NHTCTR (PFM_REGT_CTR|PFM_REGT_NOHT)
17641 +#define PFM_REGT_ENAC (PFM_REGT_CCCR|PFM_REGT_EN)
17642 +
17643 +static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value);
17644 +static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value);
17645 +static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum);
17646 +static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum);
17647 +static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags);
17648 +static void pfm_p4_free_context(struct pfm_context *ctx);
17649 +static int pfm_p4_has_ovfls(struct pfm_context *ctx);
17650 +static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set);
17651 +static void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
17652 +static void pfm_p4_nmi_copy_state(struct pfm_context *ctx);
17653 +static void __kprobes pfm_p4_quiesce(void);
17654 +
17655 +static u64 enable_mask[PFM_MAX_PMCS];
17656 +static u16 max_enable;
17657 +
17658 +static struct pfm_p4_regmap pmc_addrs[PFM_MAX_PMCS] = {
17659 + /*pmc 0 */ {{MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1}, 0, PFM_REGT_ESCR}, /* BPU_ESCR0,1 */
17660 + /*pmc 1 */ {{MSR_P4_IS_ESCR0, MSR_P4_IS_ESCR1}, 0, PFM_REGT_ESCR}, /* IS_ESCR0,1 */
17661 + /*pmc 2 */ {{MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1}, 0, PFM_REGT_ESCR}, /* MOB_ESCR0,1 */
17662 + /*pmc 3 */ {{MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1}, 0, PFM_REGT_ESCR}, /* ITLB_ESCR0,1 */
17663 + /*pmc 4 */ {{MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1}, 0, PFM_REGT_ESCR}, /* PMH_ESCR0,1 */
17664 + /*pmc 5 */ {{MSR_P4_IX_ESCR0, MSR_P4_IX_ESCR1}, 0, PFM_REGT_ESCR}, /* IX_ESCR0,1 */
17665 + /*pmc 6 */ {{MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1}, 0, PFM_REGT_ESCR}, /* FSB_ESCR0,1 */
17666 + /*pmc 7 */ {{MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1}, 0, PFM_REGT_ESCR}, /* BSU_ESCR0,1 */
17667 + /*pmc 8 */ {{MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1}, 0, PFM_REGT_ESCR}, /* MS_ESCR0,1 */
17668 + /*pmc 9 */ {{MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1}, 0, PFM_REGT_ESCR}, /* TC_ESCR0,1 */
17669 + /*pmc 10*/ {{MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1}, 0, PFM_REGT_ESCR}, /* TBPU_ESCR0,1 */
17670 + /*pmc 11*/ {{MSR_P4_FLAME_ESCR0, MSR_P4_FLAME_ESCR1}, 0, PFM_REGT_ESCR}, /* FLAME_ESCR0,1 */
17671 + /*pmc 12*/ {{MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1}, 0, PFM_REGT_ESCR}, /* FIRM_ESCR0,1 */
17672 + /*pmc 13*/ {{MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1}, 0, PFM_REGT_ESCR}, /* SAAT_ESCR0,1 */
17673 + /*pmc 14*/ {{MSR_P4_U2L_ESCR0, MSR_P4_U2L_ESCR1}, 0, PFM_REGT_ESCR}, /* U2L_ESCR0,1 */
17674 + /*pmc 15*/ {{MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1}, 0, PFM_REGT_ESCR}, /* DAC_ESCR0,1 */
17675 + /*pmc 16*/ {{MSR_P4_IQ_ESCR0, MSR_P4_IQ_ESCR1}, 0, PFM_REGT_ESCR}, /* IQ_ESCR0,1 (only model 1 and 2) */
17676 + /*pmc 17*/ {{MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1}, 0, PFM_REGT_ESCR}, /* ALF_ESCR0,1 */
17677 + /*pmc 18*/ {{MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1}, 0, PFM_REGT_ESCR}, /* RAT_ESCR0,1 */
17678 + /*pmc 19*/ {{MSR_P4_SSU_ESCR0, 0}, 0, PFM_REGT_ESCR}, /* SSU_ESCR0 */
17679 + /*pmc 20*/ {{MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1}, 0, PFM_REGT_ESCR}, /* CRU_ESCR0,1 */
17680 + /*pmc 21*/ {{MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3}, 0, PFM_REGT_ESCR}, /* CRU_ESCR2,3 */
17681 + /*pmc 22*/ {{MSR_P4_CRU_ESCR4, MSR_P4_CRU_ESCR5}, 0, PFM_REGT_ESCR}, /* CRU_ESCR4,5 */
17682 +
17683 + /*pmc 23*/ {{MSR_P4_BPU_CCCR0, MSR_P4_BPU_CCCR2}, 0, PFM_REGT_ENAC}, /* BPU_CCCR0,2 */
17684 + /*pmc 24*/ {{MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3}, 1, PFM_REGT_ENAC}, /* BPU_CCCR1,3 */
17685 + /*pmc 25*/ {{MSR_P4_MS_CCCR0, MSR_P4_MS_CCCR2}, 2, PFM_REGT_ENAC}, /* MS_CCCR0,2 */
17686 + /*pmc 26*/ {{MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3}, 3, PFM_REGT_ENAC}, /* MS_CCCR1,3 */
17687 + /*pmc 27*/ {{MSR_P4_FLAME_CCCR0, MSR_P4_FLAME_CCCR2}, 4, PFM_REGT_ENAC}, /* FLAME_CCCR0,2 */
17688 + /*pmc 28*/ {{MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3}, 5, PFM_REGT_ENAC}, /* FLAME_CCCR1,3 */
17689 + /*pmc 29*/ {{MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR2}, 6, PFM_REGT_ENAC}, /* IQ_CCCR0,2 */
17690 + /*pmc 30*/ {{MSR_P4_IQ_CCCR1, MSR_P4_IQ_CCCR3}, 7, PFM_REGT_ENAC}, /* IQ_CCCR1,3 */
17691 + /*pmc 31*/ {{MSR_P4_IQ_CCCR4, MSR_P4_IQ_CCCR5}, 8, PFM_REGT_ENAC}, /* IQ_CCCR4,5 */
17692 + /* non HT extensions */
17693 + /*pmc 32*/ {{MSR_P4_BPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BPU_ESCR1 */
17694 + /*pmc 33*/ {{MSR_P4_IS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IS_ESCR1 */
17695 + /*pmc 34*/ {{MSR_P4_MOB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MOB_ESCR1 */
17696 + /*pmc 35*/ {{MSR_P4_ITLB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ITLB_ESCR1 */
17697 + /*pmc 36*/ {{MSR_P4_PMH_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* PMH_ESCR1 */
17698 + /*pmc 37*/ {{MSR_P4_IX_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IX_ESCR1 */
17699 + /*pmc 38*/ {{MSR_P4_FSB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FSB_ESCR1 */
17700 + /*pmc 39*/ {{MSR_P4_BSU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BSU_ESCR1 */
17701 + /*pmc 40*/ {{MSR_P4_MS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MS_ESCR1 */
17702 + /*pmc 41*/ {{MSR_P4_TC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TC_ESCR1 */
17703 + /*pmc 42*/ {{MSR_P4_TBPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TBPU_ESCR1 */
17704 + /*pmc 43*/ {{MSR_P4_FLAME_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FLAME_ESCR1 */
17705 + /*pmc 44*/ {{MSR_P4_FIRM_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FIRM_ESCR1 */
17706 + /*pmc 45*/ {{MSR_P4_SAAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* SAAT_ESCR1 */
17707 + /*pmc 46*/ {{MSR_P4_U2L_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* U2L_ESCR1 */
17708 + /*pmc 47*/ {{MSR_P4_DAC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* DAC_ESCR1 */
17709 + /*pmc 48*/ {{MSR_P4_IQ_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IQ_ESCR1 (only model 1 and 2) */
17710 + /*pmc 49*/ {{MSR_P4_ALF_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ALF_ESCR1 */
17711 + /*pmc 50*/ {{MSR_P4_RAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* RAT_ESCR1 */
17712 + /*pmc 51*/ {{MSR_P4_CRU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR1 */
17713 + /*pmc 52*/ {{MSR_P4_CRU_ESCR3, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR3 */
17714 + /*pmc 53*/ {{MSR_P4_CRU_ESCR5, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR5 */
17715 + /*pmc 54*/ {{MSR_P4_BPU_CCCR1, 0}, 9, PFM_REGT_NHTCCCR}, /* BPU_CCCR1 */
17716 + /*pmc 55*/ {{MSR_P4_BPU_CCCR3, 0}, 10, PFM_REGT_NHTCCCR}, /* BPU_CCCR3 */
17717 + /*pmc 56*/ {{MSR_P4_MS_CCCR1, 0}, 11, PFM_REGT_NHTCCCR}, /* MS_CCCR1 */
17718 + /*pmc 57*/ {{MSR_P4_MS_CCCR3, 0}, 12, PFM_REGT_NHTCCCR}, /* MS_CCCR3 */
17719 + /*pmc 58*/ {{MSR_P4_FLAME_CCCR1, 0}, 13, PFM_REGT_NHTCCCR}, /* FLAME_CCCR1 */
17720 + /*pmc 59*/ {{MSR_P4_FLAME_CCCR3, 0}, 14, PFM_REGT_NHTCCCR}, /* FLAME_CCCR3 */
17721 + /*pmc 60*/ {{MSR_P4_IQ_CCCR2, 0}, 15, PFM_REGT_NHTCCCR}, /* IQ_CCCR2 */
17722 + /*pmc 61*/ {{MSR_P4_IQ_CCCR3, 0}, 16, PFM_REGT_NHTCCCR}, /* IQ_CCCR3 */
17723 + /*pmc 62*/ {{MSR_P4_IQ_CCCR5, 0}, 17, PFM_REGT_NHTCCCR}, /* IQ_CCCR5 */
17724 + /*pmc 63*/ {{0x3f2, 0}, 0, PFM_REGT_NHTPEBS},/* PEBS_MATRIX_VERT */
17725 + /*pmc 64*/ {{0x3f1, 0}, 0, PFM_REGT_NHTPEBS} /* PEBS_ENABLE */
17726 +};
17727 +
17728 +static struct pfm_p4_regmap pmd_addrs[PFM_MAX_PMDS] = {
17729 + /*pmd 0 */ {{MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_PERFCTR2}, 0, PFM_REGT_CTR}, /* BPU_CTR0,2 */
17730 + /*pmd 1 */ {{MSR_P4_BPU_PERFCTR1, MSR_P4_BPU_PERFCTR3}, 0, PFM_REGT_CTR}, /* BPU_CTR1,3 */
17731 + /*pmd 2 */ {{MSR_P4_MS_PERFCTR0, MSR_P4_MS_PERFCTR2}, 0, PFM_REGT_CTR}, /* MS_CTR0,2 */
17732 + /*pmd 3 */ {{MSR_P4_MS_PERFCTR1, MSR_P4_MS_PERFCTR3}, 0, PFM_REGT_CTR}, /* MS_CTR1,3 */
17733 + /*pmd 4 */ {{MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_PERFCTR2}, 0, PFM_REGT_CTR}, /* FLAME_CTR0,2 */
17734 + /*pmd 5 */ {{MSR_P4_FLAME_PERFCTR1, MSR_P4_FLAME_PERFCTR3}, 0, PFM_REGT_CTR}, /* FLAME_CTR1,3 */
17735 + /*pmd 6 */ {{MSR_P4_IQ_PERFCTR0, MSR_P4_IQ_PERFCTR2}, 0, PFM_REGT_CTR}, /* IQ_CTR0,2 */
17736 + /*pmd 7 */ {{MSR_P4_IQ_PERFCTR1, MSR_P4_IQ_PERFCTR3}, 0, PFM_REGT_CTR}, /* IQ_CTR1,3 */
17737 + /*pmd 8 */ {{MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_PERFCTR5}, 0, PFM_REGT_CTR}, /* IQ_CTR4,5 */
17738 + /*
17739 + * non HT extensions
17740 + */
17741 + /*pmd 9 */ {{MSR_P4_BPU_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR2 */
17742 + /*pmd 10*/ {{MSR_P4_BPU_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR3 */
17743 + /*pmd 11*/ {{MSR_P4_MS_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR2 */
17744 + /*pmd 12*/ {{MSR_P4_MS_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR3 */
17745 + /*pmd 13*/ {{MSR_P4_FLAME_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR2 */
17746 + /*pmd 14*/ {{MSR_P4_FLAME_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR3 */
17747 + /*pmd 15*/ {{MSR_P4_IQ_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR2 */
17748 + /*pmd 16*/ {{MSR_P4_IQ_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR3 */
17749 + /*pmd 17*/ {{MSR_P4_IQ_PERFCTR5, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR5 */
17750 +};
17751 +
17752 +static struct pfm_arch_pmu_info pfm_p4_pmu_info = {
17753 + .write_pmc = pfm_p4_write_pmc,
17754 + .write_pmd = pfm_p4_write_pmd,
17755 + .read_pmc = pfm_p4_read_pmc,
17756 + .read_pmd = pfm_p4_read_pmd,
17757 + .create_context = pfm_p4_create_context,
17758 + .free_context = pfm_p4_free_context,
17759 + .has_ovfls = pfm_p4_has_ovfls,
17760 + .stop_save = pfm_p4_stop_save,
17761 + .restore_pmcs = pfm_p4_restore_pmcs,
17762 + .nmi_copy_state = pfm_p4_nmi_copy_state,
17763 + .quiesce = pfm_p4_quiesce
17764 +};
17765 +
17766 +static struct pfm_regmap_desc pfm_p4_pmc_desc[] = {
17767 +/* pmc0 */ PMC_D(PFM_REG_I, "BPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR0),
17768 +/* pmc1 */ PMC_D(PFM_REG_I, "IS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0),
17769 +/* pmc2 */ PMC_D(PFM_REG_I, "MOB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR0),
17770 +/* pmc3 */ PMC_D(PFM_REG_I, "ITLB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR0),
17771 +/* pmc4 */ PMC_D(PFM_REG_I, "PMH_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR0),
17772 +/* pmc5 */ PMC_D(PFM_REG_I, "IX_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR0),
17773 +/* pmc6 */ PMC_D(PFM_REG_I, "FSB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR0),
17774 +/* pmc7 */ PMC_D(PFM_REG_I, "BSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR0),
17775 +/* pmc8 */ PMC_D(PFM_REG_I, "MS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR0),
17776 +/* pmc9 */ PMC_D(PFM_REG_I, "TC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR0),
17777 +/* pmc10 */ PMC_D(PFM_REG_I, "TBPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR0),
17778 +/* pmc11 */ PMC_D(PFM_REG_I, "FLAME_ESCR0", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR0),
17779 +/* pmc12 */ PMC_D(PFM_REG_I, "FIRM_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR0),
17780 +/* pmc13 */ PMC_D(PFM_REG_I, "SAAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR0),
17781 +/* pmc14 */ PMC_D(PFM_REG_I, "U2L_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR0),
17782 +/* pmc15 */ PMC_D(PFM_REG_I, "DAC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR0),
17783 +/* pmc16 */ PMC_D(PFM_REG_I, "IQ_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), /* only model 1 and 2*/
17784 +/* pmc17 */ PMC_D(PFM_REG_I, "ALF_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR0),
17785 +/* pmc18 */ PMC_D(PFM_REG_I, "RAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR0),
17786 +/* pmc19 */ PMC_D(PFM_REG_I, "SSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SSU_ESCR0),
17787 +/* pmc20 */ PMC_D(PFM_REG_I, "CRU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR0),
17788 +/* pmc21 */ PMC_D(PFM_REG_I, "CRU_ESCR2" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR2),
17789 +/* pmc22 */ PMC_D(PFM_REG_I, "CRU_ESCR4" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR4),
17790 +/* pmc23 */ PMC_D(PFM_REG_I64, "BPU_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR0),
17791 +/* pmc24 */ PMC_D(PFM_REG_I64, "BPU_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR1),
17792 +/* pmc25 */ PMC_D(PFM_REG_I64, "MS_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR0),
17793 +/* pmc26 */ PMC_D(PFM_REG_I64, "MS_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR1),
17794 +/* pmc27 */ PMC_D(PFM_REG_I64, "FLAME_CCCR0", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR0),
17795 +/* pmc28 */ PMC_D(PFM_REG_I64, "FLAME_CCCR1", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR1),
17796 +/* pmc29 */ PMC_D(PFM_REG_I64, "IQ_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR0),
17797 +/* pmc30 */ PMC_D(PFM_REG_I64, "IQ_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR1),
17798 +/* pmc31 */ PMC_D(PFM_REG_I64, "IQ_CCCR4" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR4),
17799 + /* No HT extension */
17800 +/* pmc32 */ PMC_D(PFM_REG_I, "BPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR1),
17801 +/* pmc33 */ PMC_D(PFM_REG_I, "IS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IS_ESCR1),
17802 +/* pmc34 */ PMC_D(PFM_REG_I, "MOB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR1),
17803 +/* pmc35 */ PMC_D(PFM_REG_I, "ITLB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR1),
17804 +/* pmc36 */ PMC_D(PFM_REG_I, "PMH_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR1),
17805 +/* pmc37 */ PMC_D(PFM_REG_I, "IX_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR1),
17806 +/* pmc38 */ PMC_D(PFM_REG_I, "FSB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR1),
17807 +/* pmc39 */ PMC_D(PFM_REG_I, "BSU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR1),
17808 +/* pmc40 */ PMC_D(PFM_REG_I, "MS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR1),
17809 +/* pmc41 */ PMC_D(PFM_REG_I, "TC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR1),
17810 +/* pmc42 */ PMC_D(PFM_REG_I, "TBPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR1),
17811 +/* pmc43 */ PMC_D(PFM_REG_I, "FLAME_ESCR1", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR1),
17812 +/* pmc44 */ PMC_D(PFM_REG_I, "FIRM_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR1),
17813 +/* pmc45 */ PMC_D(PFM_REG_I, "SAAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR1),
17814 +/* pmc46 */ PMC_D(PFM_REG_I, "U2L_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR1),
17815 +/* pmc47 */ PMC_D(PFM_REG_I, "DAC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR1),
17816 +/* pmc48 */ PMC_D(PFM_REG_I, "IQ_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR1), /* only model 1 and 2 */
17817 +/* pmc49 */ PMC_D(PFM_REG_I, "ALF_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR1),
17818 +/* pmc50 */ PMC_D(PFM_REG_I, "RAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR1),
17819 +/* pmc51 */ PMC_D(PFM_REG_I, "CRU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR1),
17820 +/* pmc52 */ PMC_D(PFM_REG_I, "CRU_ESCR3" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR3),
17821 +/* pmc53 */ PMC_D(PFM_REG_I, "CRU_ESCR5" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR5),
17822 +/* pmc54 */ PMC_D(PFM_REG_I64, "BPU_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR2),
17823 +/* pmc55 */ PMC_D(PFM_REG_I64, "BPU_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR3),
17824 +/* pmc56 */ PMC_D(PFM_REG_I64, "MS_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR2),
17825 +/* pmc57 */ PMC_D(PFM_REG_I64, "MS_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR3),
17826 +/* pmc58 */ PMC_D(PFM_REG_I64, "FLAME_CCCR2", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR2),
17827 +/* pmc59 */ PMC_D(PFM_REG_I64, "FLAME_CCCR3", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR3),
17828 +/* pmc60 */ PMC_D(PFM_REG_I64, "IQ_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR2),
17829 +/* pmc61 */ PMC_D(PFM_REG_I64, "IQ_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR3),
17830 +/* pmc62 */ PMC_D(PFM_REG_I64, "IQ_CCCR5" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR5),
17831 +/* pmc63 */ PMC_D(PFM_REG_I, "PEBS_MATRIX_VERT", 0, 0xffffffffffffffecULL, 0, 0x3f2),
17832 +/* pmc64 */ PMC_D(PFM_REG_I, "PEBS_ENABLE", 0, 0xfffffffff8ffe000ULL, 0, 0x3f1)
17833 +};
17834 +#define PFM_P4_NUM_PMCS ARRAY_SIZE(pfm_p4_pmc_desc)
17835 +
17836 +/*
17837 + * See section 15.10.6.6 for details about the IQ block
17838 + */
17839 +static struct pfm_regmap_desc pfm_p4_pmd_desc[] = {
17840 +/* pmd0 */ PMD_D(PFM_REG_C, "BPU_CTR0", MSR_P4_BPU_PERFCTR0),
17841 +/* pmd1 */ PMD_D(PFM_REG_C, "BPU_CTR1", MSR_P4_BPU_PERFCTR1),
17842 +/* pmd2 */ PMD_D(PFM_REG_C, "MS_CTR0", MSR_P4_MS_PERFCTR0),
17843 +/* pmd3 */ PMD_D(PFM_REG_C, "MS_CTR1", MSR_P4_MS_PERFCTR1),
17844 +/* pmd4 */ PMD_D(PFM_REG_C, "FLAME_CTR0", MSR_P4_FLAME_PERFCTR0),
17845 +/* pmd5 */ PMD_D(PFM_REG_C, "FLAME_CTR1", MSR_P4_FLAME_PERFCTR1),
17846 +/* pmd6 */ PMD_D(PFM_REG_C, "IQ_CTR0", MSR_P4_IQ_PERFCTR0),
17847 +/* pmd7 */ PMD_D(PFM_REG_C, "IQ_CTR1", MSR_P4_IQ_PERFCTR1),
17848 +/* pmd8 */ PMD_D(PFM_REG_C, "IQ_CTR4", MSR_P4_IQ_PERFCTR4),
17849 + /* no HT extension */
17850 +/* pmd9 */ PMD_D(PFM_REG_C, "BPU_CTR2", MSR_P4_BPU_PERFCTR2),
17851 +/* pmd10 */ PMD_D(PFM_REG_C, "BPU_CTR3", MSR_P4_BPU_PERFCTR3),
17852 +/* pmd11 */ PMD_D(PFM_REG_C, "MS_CTR2", MSR_P4_MS_PERFCTR2),
17853 +/* pmd12 */ PMD_D(PFM_REG_C, "MS_CTR3", MSR_P4_MS_PERFCTR3),
17854 +/* pmd13 */ PMD_D(PFM_REG_C, "FLAME_CTR2", MSR_P4_FLAME_PERFCTR2),
17855 +/* pmd14 */ PMD_D(PFM_REG_C, "FLAME_CTR3", MSR_P4_FLAME_PERFCTR3),
17856 +/* pmd15 */ PMD_D(PFM_REG_C, "IQ_CTR2", MSR_P4_IQ_PERFCTR2),
17857 +/* pmd16 */ PMD_D(PFM_REG_C, "IQ_CTR3", MSR_P4_IQ_PERFCTR3),
17858 +/* pmd17 */ PMD_D(PFM_REG_C, "IQ_CTR5", MSR_P4_IQ_PERFCTR5)
17859 +};
17860 +#define PFM_P4_NUM_PMDS ARRAY_SIZE(pfm_p4_pmd_desc)
17861 +
17862 +/*
17863 + * Due to hotplug CPU support, threads may not necessarily
17864 + * be activated at the time the module is inserted. We need
17865 + * to check whether they could be activated by looking at
17866 + * the present CPU (present != online).
17867 + */
17868 +static int pfm_p4_probe_pmu(void)
17869 +{
17870 + unsigned int i;
17871 + int ht_enabled;
17872 +
17873 + /*
17874 + * only works on Intel processors
17875 + */
17876 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
17877 + PFM_INFO("not running on Intel processor");
17878 + return -1;
17879 + }
17880 +
17881 + if (current_cpu_data.x86 != 15) {
17882 + PFM_INFO("unsupported family=%d", current_cpu_data.x86);
17883 + return -1;
17884 + }
17885 +
17886 + switch (current_cpu_data.x86_model) {
17887 + case 0 ... 2:
17888 + break;
17889 + case 3 ... 6:
17890 + /*
17891 + * IQ_ESCR0, IQ_ESCR1 only present on model 1, 2
17892 + */
17893 + pfm_p4_pmc_desc[16].type = PFM_REG_NA;
17894 + pfm_p4_pmc_desc[48].type = PFM_REG_NA;
17895 + break;
17896 + default:
17897 + /*
17898 + * do not know if they all work the same, so reject
17899 + * for now
17900 + */
17901 + if (!force) {
17902 + PFM_INFO("unsupported model %d",
17903 + current_cpu_data.x86_model);
17904 + return -1;
17905 + }
17906 + }
17907 +
17908 + /*
17909 + * check for local APIC (required)
17910 + */
17911 + if (!cpu_has_apic) {
17912 + PFM_INFO("no local APIC, unsupported");
17913 + return -1;
17914 + }
17915 +#ifdef CONFIG_SMP
17916 + ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map))
17917 + / current_cpu_data.x86_max_cores) > 1;
17918 +#else
17919 + ht_enabled = 0;
17920 +#endif
17921 + if (cpu_has_ht) {
17922 +
17923 + PFM_INFO("HyperThreading supported, status %s",
17924 + ht_enabled ? "on": "off");
17925 + /*
17926 + * disable registers not supporting HT
17927 + */
17928 + if (ht_enabled) {
17929 + PFM_INFO("disabling half the registers for HT");
17930 + for (i = 0; i < PFM_P4_NUM_PMCS; i++) {
17931 + if (pmc_addrs[(i)].reg_type & PFM_REGT_NOHT)
17932 + pfm_p4_pmc_desc[i].type = PFM_REG_NA;
17933 + }
17934 + for (i = 0; i < PFM_P4_NUM_PMDS; i++) {
17935 + if (pmd_addrs[(i)].reg_type & PFM_REGT_NOHT)
17936 + pfm_p4_pmd_desc[i].type = PFM_REG_NA;
17937 + }
17938 + }
17939 + }
17940 +
17941 + if (cpu_has_ds) {
17942 + PFM_INFO("Data Save Area (DS) supported");
17943 +
17944 + if (cpu_has_pebs) {
17945 + /*
17946 + * PEBS does not work with HyperThreading enabled
17947 + */
17948 + if (ht_enabled)
17949 + PFM_INFO("PEBS supported, status off (because of HT)");
17950 + else
17951 + PFM_INFO("PEBS supported, status on");
17952 + }
17953 + }
17954 +
17955 + /*
17956 + * build enable mask
17957 + */
17958 + for (i = 0; i < PFM_P4_NUM_PMCS; i++) {
17959 + if (pmc_addrs[(i)].reg_type & PFM_REGT_EN) {
17960 + __set_bit(i, cast_ulp(enable_mask));
17961 + max_enable = i + 1;
17962 + }
17963 + }
17964 +
17965 + if (force_nmi)
17966 + pfm_p4_pmu_info.flags |= PFM_X86_FL_USE_NMI;
17967 + return 0;
17968 +}
17969 +static inline int get_smt_id(void)
17970 +{
17971 +#ifdef CONFIG_SMP
17972 + int cpu = smp_processor_id();
17973 + return (cpu != first_cpu(__get_cpu_var(cpu_sibling_map)));
17974 +#else
17975 + return 0;
17976 +#endif
17977 +}
17978 +
17979 +static void __pfm_write_reg_p4(const struct pfm_p4_regmap *xreg, u64 val)
17980 +{
17981 + u64 pmi;
17982 + int smt_id;
17983 +
17984 + smt_id = get_smt_id();
17985 + /*
17986 + * HT is only supported by P4-style PMU
17987 + *
17988 + * Adjust for T1 if necessary:
17989 + *
17990 + * - move the T0_OS/T0_USR bits into T1 slots
17991 + * - move the OVF_PMI_T0 bits into T1 slot
17992 + *
17993 + * The P4/EM64T T1 is cleared by description table.
17994 + * User only works with T0.
17995 + */
17996 + if (smt_id) {
17997 + if (xreg->reg_type & PFM_REGT_ESCR) {
17998 +
17999 + /* copy T0_USR & T0_OS to T1 */
18000 + val |= ((val & 0xc) >> 2);
18001 +
18002 + /* clear bits T0_USR & T0_OS */
18003 + val &= ~0xc;
18004 +
18005 + } else if (xreg->reg_type & PFM_REGT_CCCR) {
18006 + pmi = (val >> 26) & 0x1;
18007 + if (pmi) {
18008 + val &= ~(1UL<<26);
18009 + val |= 1UL<<27;
18010 + }
18011 + }
18012 + }
18013 + if (xreg->addrs[smt_id])
18014 + wrmsrl(xreg->addrs[smt_id], val);
18015 +}
18016 +
18017 +void __pfm_read_reg_p4(const struct pfm_p4_regmap *xreg, u64 *val)
18018 +{
18019 + int smt_id;
18020 +
18021 + smt_id = get_smt_id();
18022 +
18023 + if (likely(xreg->addrs[smt_id])) {
18024 + rdmsrl(xreg->addrs[smt_id], *val);
18025 + /*
18026 + * HT is only supported by P4-style PMU
18027 + *
18028 + * move the Tx_OS and Tx_USR bits into
18029 + * T0 slots setting the T1 slots to zero
18030 + */
18031 + if (xreg->reg_type & PFM_REGT_ESCR) {
18032 + if (smt_id)
18033 + *val |= (((*val) & 0x3) << 2);
18034 +
18035 + /*
18036 + * zero out bits that are reserved
18037 + * (including T1_OS and T1_USR)
18038 + */
18039 + *val &= PFM_ESCR_RSVD;
18040 + }
18041 + } else {
18042 + *val = 0;
18043 + }
18044 +}
18045 +static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value)
18046 +{
18047 + __pfm_write_reg_p4(&pmc_addrs[cnum], value);
18048 +}
18049 +
18050 +static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value)
18051 +{
18052 + __pfm_write_reg_p4(&pmd_addrs[cnum], value);
18053 +}
18054 +
18055 +static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum)
18056 +{
18057 + u64 tmp;
18058 + __pfm_read_reg_p4(&pmd_addrs[cnum], &tmp);
18059 + return tmp;
18060 +}
18061 +
18062 +static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum)
18063 +{
18064 + u64 tmp;
18065 + __pfm_read_reg_p4(&pmc_addrs[cnum], &tmp);
18066 + return tmp;
18067 +}
18068 +
18069 +struct pfm_ds_area_p4 {
18070 + unsigned long bts_buf_base;
18071 + unsigned long bts_index;
18072 + unsigned long bts_abs_max;
18073 + unsigned long bts_intr_thres;
18074 + unsigned long pebs_buf_base;
18075 + unsigned long pebs_index;
18076 + unsigned long pebs_abs_max;
18077 + unsigned long pebs_intr_thres;
18078 + u64 pebs_cnt_reset;
18079 +};
18080 +
18081 +
18082 +static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
18083 +{
18084 + struct pfm_arch_pmu_info *pmu_info;
18085 + struct pfm_arch_context *ctx_arch;
18086 + struct pfm_ds_area_p4 *ds = NULL;
18087 + u64 used_mask[PFM_PMC_BV];
18088 + u16 i, j, count, pebs_idx = ~0;
18089 + u16 max_pmc;
18090 + u64 cccr, ctr1, ctr2, ovfl_mask;
18091 +
18092 + pmu_info = &pfm_p4_pmu_info;
18093 + ctx_arch = pfm_ctx_arch(ctx);
18094 + max_pmc = ctx->regs.max_pmc;
18095 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
18096 +
18097 + /*
18098 + * build used enable PMC bitmask
18099 + * if user did not set any CCCR, then mask is
18100 + * empty and there is nothing to do because nothing
18101 + * was started
18102 + */
18103 + bitmap_and(cast_ulp(used_mask),
18104 + cast_ulp(set->used_pmcs),
18105 + cast_ulp(enable_mask),
18106 + max_enable);
18107 +
18108 + count = bitmap_weight(cast_ulp(used_mask), max_enable);
18109 +
18110 + PFM_DBG_ovfl("npend=%u ena_mask=0x%llx u_pmcs=0x%llx count=%u num=%u",
18111 + set->npend_ovfls,
18112 + (unsigned long long)enable_mask[0],
18113 + (unsigned long long)set->used_pmcs[0],
18114 + count, max_enable);
18115 +
18116 + /*
18117 + * ensures we do not destroy pending overflow
18118 + * information. If pended interrupts are already
18119 + * known, then we just stop monitoring.
18120 + */
18121 + if (set->npend_ovfls) {
18122 + /*
18123 + * clear enable bit
18124 + * unfortunately, this is very expensive!
18125 + */
18126 + for (i = 0; count; i++) {
18127 + if (test_bit(i, cast_ulp(used_mask))) {
18128 + __pfm_write_reg_p4(pmc_addrs+i, 0);
18129 + count--;
18130 + }
18131 + }
18132 + /* need save PMDs at upper level */
18133 + return 1;
18134 + }
18135 +
18136 + if (ctx_arch->flags.use_pebs) {
18137 + ds = ctx_arch->ds_area;
18138 + pebs_idx = PEBS_PMD;
18139 + PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx",
18140 + ds,
18141 + (unsigned long long)ds->pebs_index,
18142 + (unsigned long long)ds->pebs_intr_thres);
18143 + }
18144 +
18145 + /*
18146 + * stop monitoring AND collect pending overflow information AND
18147 + * save pmds.
18148 + *
18149 + * We need to access the CCCR twice, once to get overflow info
18150 + * and a second to stop monitoring (which destroys the OVF flag)
18151 + * Similarly, we need to read the counter twice to check whether
18152 + * it did overflow between the CCR read and the CCCR write.
18153 + */
18154 + for (i = 0; count; i++) {
18155 + if (i != pebs_idx && test_bit(i, cast_ulp(used_mask))) {
18156 + /*
18157 + * controlled counter
18158 + */
18159 + j = pmc_addrs[i].ctr;
18160 +
18161 + /* read CCCR (PMC) value */
18162 + __pfm_read_reg_p4(pmc_addrs+i, &cccr);
18163 +
18164 + /* read counter (PMD) controlled by PMC */
18165 + __pfm_read_reg_p4(pmd_addrs+j, &ctr1);
18166 +
18167 + /* clear CCCR value: stop counter but destroy OVF */
18168 + __pfm_write_reg_p4(pmc_addrs+i, 0);
18169 +
18170 + /* read counter controlled by CCCR again */
18171 + __pfm_read_reg_p4(pmd_addrs+j, &ctr2);
18172 +
18173 + /*
18174 + * there is an overflow if either:
18175 + * - CCCR.ovf is set (and we just cleared it)
18176 + * - ctr2 < ctr1
18177 + * in that case we set the bit corresponding to the
18178 + * overflowed PMD in povfl_pmds.
18179 + */
18180 + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) {
18181 + __set_bit(j, cast_ulp(set->povfl_pmds));
18182 + set->npend_ovfls++;
18183 + }
18184 + ctr2 = (set->pmds[j].value & ~ovfl_mask) | (ctr2 & ovfl_mask);
18185 + set->pmds[j].value = ctr2;
18186 + count--;
18187 + }
18188 + }
18189 + /*
18190 + * check for PEBS buffer full and set the corresponding PMD overflow
18191 + */
18192 + if (ctx_arch->flags.use_pebs) {
18193 + PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", ds, ds->pebs_index, ds->pebs_intr_thres);
18194 + if (ds->pebs_index >= ds->pebs_intr_thres
18195 + && test_bit(PEBS_PMD, cast_ulp(set->used_pmds))) {
18196 + __set_bit(PEBS_PMD, cast_ulp(set->povfl_pmds));
18197 + set->npend_ovfls++;
18198 + }
18199 + }
18200 + /* 0 means: no need to save the PMD at higher level */
18201 + return 0;
18202 +}
18203 +
18204 +static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags)
18205 +{
18206 + struct pfm_arch_context *ctx_arch;
18207 +
18208 + ctx_arch = pfm_ctx_arch(ctx);
18209 +
18210 + ctx_arch->data = kzalloc(sizeof(struct pfm_arch_p4_context), GFP_KERNEL);
18211 + if (!ctx_arch->data)
18212 + return -ENOMEM;
18213 +
18214 + return 0;
18215 +}
18216 +
18217 +static void pfm_p4_free_context(struct pfm_context *ctx)
18218 +{
18219 + struct pfm_arch_context *ctx_arch;
18220 +
18221 + ctx_arch = pfm_ctx_arch(ctx);
18222 + /*
18223 + * we do not check if P4, because it would be NULL and
18224 + * kfree can deal with NULL
18225 + */
18226 + kfree(ctx_arch->data);
18227 +}
18228 +
18229 +/*
18230 + * detect is counters have overflowed.
18231 + * return:
18232 + * 0 : no overflow
18233 + * 1 : at least one overflow
18234 + *
18235 + * used by Intel P4
18236 + */
18237 +static int __kprobes pfm_p4_has_ovfls(struct pfm_context *ctx)
18238 +{
18239 + struct pfm_arch_pmu_info *pmu_info;
18240 + struct pfm_p4_regmap *xrc, *xrd;
18241 + struct pfm_arch_context *ctx_arch;
18242 + struct pfm_arch_p4_context *p4;
18243 + u64 ena_mask[PFM_PMC_BV];
18244 + u64 cccr, ctr1, ctr2;
18245 + int n, i, j;
18246 +
18247 + pmu_info = &pfm_p4_pmu_info;
18248 +
18249 + ctx_arch = pfm_ctx_arch(ctx);
18250 + xrc = pmc_addrs;
18251 + xrd = pmd_addrs;
18252 + p4 = ctx_arch->data;
18253 +
18254 + bitmap_and(cast_ulp(ena_mask),
18255 + cast_ulp(ctx->regs.pmcs),
18256 + cast_ulp(enable_mask),
18257 + max_enable);
18258 +
18259 + n = bitmap_weight(cast_ulp(ena_mask), max_enable);
18260 +
18261 + for (i = 0; n; i++) {
18262 + if (!test_bit(i, cast_ulp(ena_mask)))
18263 + continue;
18264 + /*
18265 + * controlled counter
18266 + */
18267 + j = xrc[i].ctr;
18268 +
18269 + /* read CCCR (PMC) value */
18270 + __pfm_read_reg_p4(xrc+i, &cccr);
18271 +
18272 + /* read counter (PMD) controlled by PMC */
18273 + __pfm_read_reg_p4(xrd+j, &ctr1);
18274 +
18275 + /* clear CCCR value: stop counter but destroy OVF */
18276 + __pfm_write_reg_p4(xrc+i, 0);
18277 +
18278 + /* read counter controlled by CCCR again */
18279 + __pfm_read_reg_p4(xrd+j, &ctr2);
18280 +
18281 + /*
18282 + * there is an overflow if either:
18283 + * - CCCR.ovf is set (and we just cleared it)
18284 + * - ctr2 < ctr1
18285 + * in that case we set the bit corresponding to the
18286 + * overflowed PMD in povfl_pmds.
18287 + */
18288 + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) {
18289 + __set_bit(j, cast_ulp(p4->povfl_pmds));
18290 + p4->npend_ovfls++;
18291 + }
18292 + p4->saved_cccrs[i] = cccr;
18293 + n--;
18294 + }
18295 + /*
18296 + * if there was no overflow, then it means the NMI was not really
18297 + * for us, so we have to resume monitoring
18298 + */
18299 + if (unlikely(!p4->npend_ovfls)) {
18300 + for (i = 0; n; i++) {
18301 + if (!test_bit(i, cast_ulp(ena_mask)))
18302 + continue;
18303 + __pfm_write_reg_p4(xrc+i, p4->saved_cccrs[i]);
18304 + }
18305 + }
18306 + return 0;
18307 +}
18308 +
18309 +void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
18310 +{
18311 + struct pfm_arch_pmu_info *pmu_info;
18312 + struct pfm_arch_context *ctx_arch;
18313 + u64 *mask;
18314 + u16 i, num;
18315 +
18316 + ctx_arch = pfm_ctx_arch(ctx);
18317 + pmu_info = pfm_pmu_info();
18318 +
18319 + /*
18320 + * must restore DS pointer before restoring PMCs
18321 + * as this can potentially reactivate monitoring
18322 + */
18323 + if (ctx_arch->flags.use_ds)
18324 + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
18325 +
18326 + /*
18327 + * must restore everything because there are some dependencies
18328 + * (e.g., ESCR and CCCR)
18329 + */
18330 + num = ctx->regs.num_pmcs;
18331 + mask = ctx->regs.pmcs;
18332 + for (i = 0; num; i++) {
18333 + if (test_bit(i, cast_ulp(mask))) {
18334 + pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
18335 + num--;
18336 + }
18337 + }
18338 +}
18339 +
18340 +/*
18341 + * invoked only when NMI is used. Called from the LOCAL_PERFMON_VECTOR
18342 + * handler to copy P4 overflow state captured when the NMI triggered.
18343 + * Given that on P4, stopping monitoring destroy the overflow information
18344 + * we save it in pfm_has_ovfl_p4() where monitoring is also stopped.
18345 + *
18346 + * Here we propagate the overflow state to current active set. The
18347 + * freeze_pmu() call we not overwrite this state because npend_ovfls
18348 + * is non-zero.
18349 + */
18350 +static void pfm_p4_nmi_copy_state(struct pfm_context *ctx)
18351 +{
18352 + struct pfm_arch_context *ctx_arch;
18353 + struct pfm_event_set *set;
18354 + struct pfm_arch_p4_context *p4;
18355 +
18356 + ctx_arch = pfm_ctx_arch(ctx);
18357 + p4 = ctx_arch->data;
18358 + set = ctx->active_set;
18359 +
18360 + if (p4->npend_ovfls) {
18361 + set->npend_ovfls = p4->npend_ovfls;
18362 +
18363 + bitmap_copy(cast_ulp(set->povfl_pmds),
18364 + cast_ulp(p4->povfl_pmds),
18365 + ctx->regs.max_pmd);
18366 +
18367 + p4->npend_ovfls = 0;
18368 + }
18369 +}
18370 +
18371 +/**
18372 + * pfm_p4_quiesce - stop monitoring without grabbing any lock
18373 + *
18374 + * called from NMI interrupt handler to immediately stop monitoring
18375 + * cannot grab any lock, including perfmon related locks
18376 + */
18377 +static void __kprobes pfm_p4_quiesce(void)
18378 +{
18379 + u16 i;
18380 + /*
18381 + * quiesce PMU by clearing available registers that have
18382 + * the start/stop capability
18383 + */
18384 + for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) {
18385 + if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))
18386 + && test_bit(i, cast_ulp(enable_mask)))
18387 + __pfm_write_reg_p4(pmc_addrs+i, 0);
18388 + }
18389 +}
18390 +
18391 +
18392 +static struct pfm_pmu_config pfm_p4_pmu_conf = {
18393 + .pmu_name = "Intel P4",
18394 + .counter_width = 40,
18395 + .pmd_desc = pfm_p4_pmd_desc,
18396 + .pmc_desc = pfm_p4_pmc_desc,
18397 + .num_pmc_entries = PFM_P4_NUM_PMCS,
18398 + .num_pmd_entries = PFM_P4_NUM_PMDS,
18399 + .probe_pmu = pfm_p4_probe_pmu,
18400 + .version = "1.0",
18401 + .flags = PFM_PMU_BUILTIN_FLAG,
18402 + .owner = THIS_MODULE,
18403 + .pmu_info = &pfm_p4_pmu_info
18404 +};
18405 +
18406 +static int __init pfm_p4_pmu_init_module(void)
18407 +{
18408 + return pfm_pmu_register(&pfm_p4_pmu_conf);
18409 +}
18410 +
18411 +static void __exit pfm_p4_pmu_cleanup_module(void)
18412 +{
18413 + pfm_pmu_unregister(&pfm_p4_pmu_conf);
18414 +}
18415 +
18416 +module_init(pfm_p4_pmu_init_module);
18417 +module_exit(pfm_p4_pmu_cleanup_module);
18418 diff --git a/arch/x86/perfmon/perfmon_p6.c b/arch/x86/perfmon/perfmon_p6.c
18419 new file mode 100644
18420 index 0000000..47c0a46
18421 --- /dev/null
18422 +++ b/arch/x86/perfmon/perfmon_p6.c
18423 @@ -0,0 +1,310 @@
18424 +/*
18425 + * This file contains the P6 family processor PMU register description tables
18426 + *
18427 + * This module supports original P6 processors
18428 + * (Pentium II, Pentium Pro, Pentium III) and Pentium M.
18429 + *
18430 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
18431 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
18432 + *
18433 + * This program is free software; you can redistribute it and/or
18434 + * modify it under the terms of version 2 of the GNU General Public
18435 + * License as published by the Free Software Foundation.
18436 + *
18437 + * This program is distributed in the hope that it will be useful,
18438 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18439 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18440 + * General Public License for more details.
18441 + *
18442 + * You should have received a copy of the GNU General Public License
18443 + * along with this program; if not, write to the Free Software
18444 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18445 + * 02111-1307 USA
18446 + */
18447 +#include <linux/module.h>
18448 +#include <linux/kprobes.h>
18449 +#include <linux/perfmon_kern.h>
18450 +#include <linux/nmi.h>
18451 +#include <asm/msr.h>
18452 +
18453 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
18454 +MODULE_DESCRIPTION("P6 PMU description table");
18455 +MODULE_LICENSE("GPL");
18456 +
18457 +static int force_nmi;
18458 +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
18459 +module_param(force_nmi, bool, 0600);
18460 +
18461 +/*
18462 + * - upper 32 bits are reserved
18463 + * - INT: APIC enable bit is reserved (forced to 1)
18464 + * - bit 21 is reserved
18465 + * - bit 22 is reserved on PEREVNTSEL1
18466 + *
18467 + * RSVD: reserved bits are 1
18468 + */
18469 +#define PFM_P6_PMC0_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21))
18470 +#define PFM_P6_PMC1_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (3ULL<<21))
18471 +
18472 +/*
18473 + * force Local APIC interrupt on overflow
18474 + * disable with NO_EMUL64
18475 + */
18476 +#define PFM_P6_PMC_VAL (1ULL<<20)
18477 +#define PFM_P6_NO64 (1ULL<<20)
18478 +
18479 +
18480 +static void __kprobes pfm_p6_quiesce(void);
18481 +static int pfm_p6_has_ovfls(struct pfm_context *ctx);
18482 +static int pfm_p6_stop_save(struct pfm_context *ctx,
18483 + struct pfm_event_set *set);
18484 +
18485 +static u64 enable_mask[PFM_MAX_PMCS];
18486 +static u16 max_enable;
18487 +
18488 +/*
18489 + * PFM_X86_FL_NO_SHARING: because of the single enable bit on MSR_P6_EVNTSEL0
18490 + * the PMU cannot be shared with NMI watchdog or Oprofile
18491 + */
18492 +struct pfm_arch_pmu_info pfm_p6_pmu_info = {
18493 + .stop_save = pfm_p6_stop_save,
18494 + .has_ovfls = pfm_p6_has_ovfls,
18495 + .quiesce = pfm_p6_quiesce,
18496 + .flags = PFM_X86_FL_NO_SHARING,
18497 +};
18498 +
18499 +static struct pfm_regmap_desc pfm_p6_pmc_desc[] = {
18500 +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFEVTSEL0", PFM_P6_PMC_VAL, PFM_P6_PMC0_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL0),
18501 +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFEVTSEL1", PFM_P6_PMC_VAL, PFM_P6_PMC1_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL1)
18502 +};
18503 +#define PFM_P6_NUM_PMCS ARRAY_SIZE(pfm_p6_pmc_desc)
18504 +
18505 +#define PFM_P6_D(n) \
18506 + { .type = PFM_REG_C, \
18507 + .desc = "PERFCTR"#n, \
18508 + .hw_addr = MSR_P6_PERFCTR0+n, \
18509 + .rsvd_msk = 0, \
18510 + .dep_pmcs[0] = 1ULL << n \
18511 + }
18512 +
18513 +static struct pfm_regmap_desc pfm_p6_pmd_desc[] = {
18514 +/* pmd0 */ PFM_P6_D(0),
18515 +/* pmd1 */ PFM_P6_D(1)
18516 +};
18517 +#define PFM_P6_NUM_PMDS ARRAY_SIZE(pfm_p6_pmd_desc)
18518 +
18519 +static int pfm_p6_probe_pmu(void)
18520 +{
18521 + int high, low;
18522 +
18523 + if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
18524 + PFM_INFO("not an Intel processor");
18525 + return -1;
18526 + }
18527 +
18528 + /*
18529 + * check for P6 processor family
18530 + */
18531 + if (current_cpu_data.x86 != 6) {
18532 + PFM_INFO("unsupported family=%d", current_cpu_data.x86);
18533 + return -1;
18534 + }
18535 +
18536 + switch (current_cpu_data.x86_model) {
18537 + case 1: /* Pentium Pro */
18538 + case 3:
18539 + case 5: /* Pentium II Deschutes */
18540 + case 7 ... 11:
18541 + break;
18542 + case 13:
18543 + /* for Pentium M, we need to check if PMU exist */
18544 + rdmsr(MSR_IA32_MISC_ENABLE, low, high);
18545 + if (low & (1U << 7))
18546 + break;
18547 + default:
18548 + PFM_INFO("unsupported CPU model %d",
18549 + current_cpu_data.x86_model);
18550 + return -1;
18551 +
18552 + }
18553 +
18554 + if (!cpu_has_apic) {
18555 + PFM_INFO("no Local APIC, try rebooting with lapic");
18556 + return -1;
18557 + }
18558 + __set_bit(0, cast_ulp(enable_mask));
18559 + __set_bit(1, cast_ulp(enable_mask));
18560 + max_enable = 1 + 1;
18561 + /*
18562 + * force NMI interrupt?
18563 + */
18564 + if (force_nmi)
18565 + pfm_p6_pmu_info.flags |= PFM_X86_FL_USE_NMI;
18566 +
18567 + return 0;
18568 +}
18569 +
18570 +/**
18571 + * pfm_p6_has_ovfls - check for pending overflow condition
18572 + * @ctx: context to work on
18573 + *
18574 + * detect if counters have overflowed.
18575 + * return:
18576 + * 0 : no overflow
18577 + * 1 : at least one overflow
18578 + */
18579 +static int __kprobes pfm_p6_has_ovfls(struct pfm_context *ctx)
18580 +{
18581 + u64 *cnt_mask;
18582 + u64 wmask, val;
18583 + u16 i, num;
18584 +
18585 + cnt_mask = ctx->regs.cnt_pmds;
18586 + num = ctx->regs.num_counters;
18587 + wmask = 1ULL << pfm_pmu_conf->counter_width;
18588 +
18589 + /*
18590 + * we can leverage the fact that we know the mapping
18591 + * to hardcode the MSR address and avoid accessing
18592 + * more cachelines
18593 + *
18594 + * We need to check cnt_mask because not all registers
18595 + * may be available.
18596 + */
18597 + for (i = 0; num; i++) {
18598 + if (test_bit(i, cast_ulp(cnt_mask))) {
18599 + rdmsrl(MSR_P6_PERFCTR0+i, val);
18600 + if (!(val & wmask))
18601 + return 1;
18602 + num--;
18603 + }
18604 + }
18605 + return 0;
18606 +}
18607 +
18608 +/**
18609 + * pfm_p6_stop_save -- stop monitoring and save PMD values
18610 + * @ctx: context to work on
18611 + * @set: current event set
18612 + *
18613 + * return value:
18614 + * 0 - no need to save PMDs in caller
18615 + * 1 - need to save PMDs in caller
18616 + */
18617 +static int pfm_p6_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
18618 +{
18619 + struct pfm_arch_pmu_info *pmu_info;
18620 + u64 used_mask[PFM_PMC_BV];
18621 + u64 *cnt_pmds;
18622 + u64 val, wmask, ovfl_mask;
18623 + u32 i, count;
18624 +
18625 + pmu_info = pfm_pmu_info();
18626 +
18627 + wmask = 1ULL << pfm_pmu_conf->counter_width;
18628 + bitmap_and(cast_ulp(used_mask),
18629 + cast_ulp(set->used_pmcs),
18630 + cast_ulp(enable_mask),
18631 + max_enable);
18632 +
18633 + count = bitmap_weight(cast_ulp(used_mask), ctx->regs.max_pmc);
18634 +
18635 + /*
18636 + * stop monitoring
18637 + * Unfortunately, this is very expensive!
18638 + * wrmsrl() is serializing.
18639 + */
18640 + for (i = 0; count; i++) {
18641 + if (test_bit(i, cast_ulp(used_mask))) {
18642 + wrmsrl(MSR_P6_EVNTSEL0+i, 0);
18643 + count--;
18644 + }
18645 + }
18646 +
18647 + /*
18648 + * if we already having a pending overflow condition, we simply
18649 + * return to take care of this first.
18650 + */
18651 + if (set->npend_ovfls)
18652 + return 1;
18653 +
18654 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
18655 + cnt_pmds = ctx->regs.cnt_pmds;
18656 +
18657 + /*
18658 + * check for pending overflows and save PMDs (combo)
18659 + * we employ used_pmds because we also need to save
18660 + * and not just check for pending interrupts.
18661 + *
18662 + * Must check for counting PMDs because of virtual PMDs
18663 + */
18664 + count = set->nused_pmds;
18665 + for (i = 0; count; i++) {
18666 + if (test_bit(i, cast_ulp(set->used_pmds))) {
18667 + val = pfm_arch_read_pmd(ctx, i);
18668 + if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
18669 + if (!(val & wmask)) {
18670 + __set_bit(i, cast_ulp(set->povfl_pmds));
18671 + set->npend_ovfls++;
18672 + }
18673 + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask);
18674 + }
18675 + set->pmds[i].value = val;
18676 + count--;
18677 + }
18678 + }
18679 + /* 0 means: no need to save PMDs at upper level */
18680 + return 0;
18681 +}
18682 +
18683 +/**
18684 + * pfm_p6_quiesce_pmu -- stop monitoring without grabbing any lock
18685 + *
18686 + * called from NMI interrupt handler to immediately stop monitoring
18687 + * cannot grab any lock, including perfmon related locks
18688 + */
18689 +static void __kprobes pfm_p6_quiesce(void)
18690 +{
18691 + /*
18692 + * quiesce PMU by clearing available registers that have
18693 + * the start/stop capability
18694 + *
18695 + * P6 processors only have enable bit on PERFEVTSEL0
18696 + */
18697 + if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
18698 + wrmsrl(MSR_P6_EVNTSEL0, 0);
18699 +}
18700 +
18701 +/*
18702 + * Counters have 40 bits implemented. However they are designed such
18703 + * that bits [32-39] are sign extensions of bit 31. As such the
18704 + * effective width of a counter for P6-like PMU is 31 bits only.
18705 + *
18706 + * See IA-32 Intel Architecture Software developer manual Vol 3B
18707 + */
18708 +static struct pfm_pmu_config pfm_p6_pmu_conf = {
18709 + .pmu_name = "Intel P6 processor Family",
18710 + .counter_width = 31,
18711 + .pmd_desc = pfm_p6_pmd_desc,
18712 + .pmc_desc = pfm_p6_pmc_desc,
18713 + .num_pmc_entries = PFM_P6_NUM_PMCS,
18714 + .num_pmd_entries = PFM_P6_NUM_PMDS,
18715 + .probe_pmu = pfm_p6_probe_pmu,
18716 + .version = "1.0",
18717 + .flags = PFM_PMU_BUILTIN_FLAG,
18718 + .owner = THIS_MODULE,
18719 + .pmu_info = &pfm_p6_pmu_info
18720 +};
18721 +
18722 +static int __init pfm_p6_pmu_init_module(void)
18723 +{
18724 + return pfm_pmu_register(&pfm_p6_pmu_conf);
18725 +}
18726 +
18727 +static void __exit pfm_p6_pmu_cleanup_module(void)
18728 +{
18729 + pfm_pmu_unregister(&pfm_p6_pmu_conf);
18730 +}
18731 +
18732 +module_init(pfm_p6_pmu_init_module);
18733 +module_exit(pfm_p6_pmu_cleanup_module);
18734 diff --git a/arch/x86/perfmon/perfmon_pebs_core_smpl.c b/arch/x86/perfmon/perfmon_pebs_core_smpl.c
18735 new file mode 100644
18736 index 0000000..eeb9174
18737 --- /dev/null
18738 +++ b/arch/x86/perfmon/perfmon_pebs_core_smpl.c
18739 @@ -0,0 +1,256 @@
18740 +/*
18741 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
18742 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
18743 + *
18744 + * This file implements the Precise Event Based Sampling (PEBS)
18745 + * sampling format for Intel Core and Atom processors.
18746 + *
18747 + * This program is free software; you can redistribute it and/or
18748 + * modify it under the terms of version 2 of the GNU General Public
18749 + * License as published by the Free Software Foundation.
18750 + *
18751 + * This program is distributed in the hope that it will be useful,
18752 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
18753 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18754 + * General Public License for more details.
18755 + *
18756 + * You should have received a copy of the GNU General Public License
18757 + * along with this program; if not, write to the Free Software
18758 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18759 + * 02111-1307 USA
18760 + */
18761 +#include <linux/kernel.h>
18762 +#include <linux/types.h>
18763 +#include <linux/module.h>
18764 +#include <linux/init.h>
18765 +#include <linux/smp.h>
18766 +#include <linux/perfmon_kern.h>
18767 +
18768 +#include <asm/msr.h>
18769 +#include <asm/perfmon_pebs_core_smpl.h>
18770 +
18771 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
18772 +MODULE_DESCRIPTION("Intel Core Precise Event-Based Sampling (PEBS)");
18773 +MODULE_LICENSE("GPL");
18774 +
18775 +#define ALIGN_PEBS(a, order) \
18776 + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1)
18777 +
18778 +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */
18779 +
18780 +static int pfm_pebs_core_fmt_validate(u32 flags, u16 npmds, void *data)
18781 +{
18782 + struct pfm_pebs_core_smpl_arg *arg = data;
18783 + size_t min_buf_size;
18784 +
18785 + /*
18786 + * need to define at least the size of the buffer
18787 + */
18788 + if (data == NULL) {
18789 + PFM_DBG("no argument passed");
18790 + return -EINVAL;
18791 + }
18792 +
18793 + /*
18794 + * compute min buf size. npmds is the maximum number
18795 + * of implemented PMD registers.
18796 + */
18797 + min_buf_size = sizeof(struct pfm_pebs_core_smpl_hdr)
18798 + + sizeof(struct pfm_pebs_core_smpl_entry)
18799 + + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */
18800 +
18801 + PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
18802 + flags,
18803 + min_buf_size,
18804 + arg->buf_size);
18805 +
18806 + /*
18807 + * must hold at least the buffer header + one minimally sized entry
18808 + */
18809 + if (arg->buf_size < min_buf_size)
18810 + return -EINVAL;
18811 +
18812 + return 0;
18813 +}
18814 +
18815 +static int pfm_pebs_core_fmt_get_size(unsigned int flags, void *data, size_t *size)
18816 +{
18817 + struct pfm_pebs_core_smpl_arg *arg = data;
18818 +
18819 + /*
18820 + * size has been validated in pfm_pebs_core_fmt_validate()
18821 + */
18822 + *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER);
18823 +
18824 + return 0;
18825 +}
18826 +
18827 +static int pfm_pebs_core_fmt_init(struct pfm_context *ctx, void *buf,
18828 + u32 flags, u16 npmds, void *data)
18829 +{
18830 + struct pfm_arch_context *ctx_arch;
18831 + struct pfm_pebs_core_smpl_hdr *hdr;
18832 + struct pfm_pebs_core_smpl_arg *arg = data;
18833 + u64 pebs_start, pebs_end;
18834 + struct pfm_ds_area_core *ds;
18835 +
18836 + ctx_arch = pfm_ctx_arch(ctx);
18837 +
18838 + hdr = buf;
18839 + ds = &hdr->ds;
18840 +
18841 + /*
18842 + * align PEBS buffer base
18843 + */
18844 + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER);
18845 + pebs_end = pebs_start + arg->buf_size + 1;
18846 +
18847 + hdr->version = PFM_PEBS_CORE_SMPL_VERSION;
18848 + hdr->buf_size = arg->buf_size;
18849 + hdr->overflows = 0;
18850 +
18851 + /*
18852 + * express PEBS buffer base as offset from the end of the header
18853 + */
18854 + hdr->start_offs = pebs_start - (unsigned long)(hdr+1);
18855 +
18856 + /*
18857 + * PEBS buffer boundaries
18858 + */
18859 + ds->pebs_buf_base = pebs_start;
18860 + ds->pebs_abs_max = pebs_end;
18861 +
18862 + /*
18863 + * PEBS starting position
18864 + */
18865 + ds->pebs_index = pebs_start;
18866 +
18867 + /*
18868 + * PEBS interrupt threshold
18869 + */
18870 + ds->pebs_intr_thres = pebs_start
18871 + + arg->intr_thres
18872 + * sizeof(struct pfm_pebs_core_smpl_entry);
18873 +
18874 + /*
18875 + * save counter reset value for PEBS counter
18876 + */
18877 + ds->pebs_cnt_reset = arg->cnt_reset;
18878 +
18879 + /*
18880 + * keep track of DS AREA
18881 + */
18882 + ctx_arch->ds_area = ds;
18883 + ctx_arch->flags.use_ds = 1;
18884 + ctx_arch->flags.use_pebs = 1;
18885 +
18886 + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%llx "
18887 + "pebs_end=0x%llx ds=%p pebs_thres=0x%llx cnt_reset=0x%llx",
18888 + buf,
18889 + (unsigned long long)hdr->buf_size,
18890 + (unsigned long long)hdr->start_offs,
18891 + (unsigned long long)pebs_start,
18892 + (unsigned long long)pebs_end,
18893 + ds,
18894 + (unsigned long long)ds->pebs_intr_thres,
18895 + (unsigned long long)ds->pebs_cnt_reset);
18896 +
18897 + return 0;
18898 +}
18899 +
18900 +static int pfm_pebs_core_fmt_handler(struct pfm_context *ctx,
18901 + unsigned long ip, u64 tstamp, void *data)
18902 +{
18903 + struct pfm_pebs_core_smpl_hdr *hdr;
18904 + struct pfm_ovfl_arg *arg;
18905 +
18906 + hdr = ctx->smpl_addr;
18907 + arg = &ctx->ovfl_arg;
18908 +
18909 + PFM_DBG_ovfl("buffer full");
18910 + /*
18911 + * increment number of buffer overflows.
18912 + * important to detect duplicate set of samples.
18913 + */
18914 + hdr->overflows++;
18915 +
18916 + /*
18917 + * request notification and masking of monitoring.
18918 + * Notification is still subject to the overflowed
18919 + * register having the FL_NOTIFY flag set.
18920 + */
18921 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
18922 +
18923 + return -ENOBUFS; /* we are full, sorry */
18924 +}
18925 +
18926 +static int pfm_pebs_core_fmt_restart(int is_active, u32 *ovfl_ctrl,
18927 + void *buf)
18928 +{
18929 + struct pfm_pebs_core_smpl_hdr *hdr = buf;
18930 +
18931 + /*
18932 + * reset index to base of buffer
18933 + */
18934 + hdr->ds.pebs_index = hdr->ds.pebs_buf_base;
18935 +
18936 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
18937 +
18938 + return 0;
18939 +}
18940 +
18941 +static int pfm_pebs_core_fmt_exit(void *buf)
18942 +{
18943 + return 0;
18944 +}
18945 +
18946 +static struct pfm_smpl_fmt pebs_core_fmt = {
18947 + .fmt_name = PFM_PEBS_CORE_SMPL_NAME,
18948 + .fmt_version = 0x1,
18949 + .fmt_arg_size = sizeof(struct pfm_pebs_core_smpl_arg),
18950 + .fmt_validate = pfm_pebs_core_fmt_validate,
18951 + .fmt_getsize = pfm_pebs_core_fmt_get_size,
18952 + .fmt_init = pfm_pebs_core_fmt_init,
18953 + .fmt_handler = pfm_pebs_core_fmt_handler,
18954 + .fmt_restart = pfm_pebs_core_fmt_restart,
18955 + .fmt_exit = pfm_pebs_core_fmt_exit,
18956 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
18957 + .owner = THIS_MODULE,
18958 +};
18959 +
18960 +static int __init pfm_pebs_core_fmt_init_module(void)
18961 +{
18962 + if (!cpu_has_pebs) {
18963 + PFM_INFO("processor does not have PEBS support");
18964 + return -1;
18965 + }
18966 + /*
18967 + * cpu_has_pebs is not enough to identify Intel Core PEBS
18968 + * which is different fro Pentium 4 PEBS. Therefore we do
18969 + * a more detailed check here
18970 + */
18971 + if (current_cpu_data.x86 != 6) {
18972 + PFM_INFO("not a supported Intel processor");
18973 + return -1;
18974 + }
18975 +
18976 + switch (current_cpu_data.x86_model) {
18977 + case 15: /* Merom */
18978 + case 23: /* Penryn */
18979 + case 28: /* Atom (Silverthorne) */
18980 + case 29: /* Dunnington */
18981 + break;
18982 + default:
18983 + PFM_INFO("not a supported Intel processor");
18984 + return -1;
18985 + }
18986 + return pfm_fmt_register(&pebs_core_fmt);
18987 +}
18988 +
18989 +static void __exit pfm_pebs_core_fmt_cleanup_module(void)
18990 +{
18991 + pfm_fmt_unregister(&pebs_core_fmt);
18992 +}
18993 +
18994 +module_init(pfm_pebs_core_fmt_init_module);
18995 +module_exit(pfm_pebs_core_fmt_cleanup_module);
18996 diff --git a/arch/x86/perfmon/perfmon_pebs_p4_smpl.c b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c
18997 new file mode 100644
18998 index 0000000..f4e9fd2
18999 --- /dev/null
19000 +++ b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c
19001 @@ -0,0 +1,253 @@
19002 +/*
19003 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
19004 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19005 + *
19006 + * This file implements the Precise Event Based Sampling (PEBS)
19007 + * sampling format. It supports the following processors:
19008 + * - 32-bit Pentium 4 or other Netburst-based processors
19009 + * - 64-bit Pentium 4 or other Netburst-based processors
19010 + *
19011 + * This program is free software; you can redistribute it and/or
19012 + * modify it under the terms of version 2 of the GNU General Public
19013 + * License as published by the Free Software Foundation.
19014 + *
19015 + * This program is distributed in the hope that it will be useful,
19016 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19017 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19018 + * General Public License for more details.
19019 + *
19020 + * You should have received a copy of the GNU General Public License
19021 + * along with this program; if not, write to the Free Software
19022 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19023 + * 02111-1307 USA
19024 + */
19025 +#include <linux/kernel.h>
19026 +#include <linux/types.h>
19027 +#include <linux/module.h>
19028 +#include <linux/init.h>
19029 +#include <linux/smp.h>
19030 +#include <linux/perfmon_kern.h>
19031 +
19032 +#include <asm/msr.h>
19033 +#include <asm/perfmon_pebs_p4_smpl.h>
19034 +
19035 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
19036 +MODULE_DESCRIPTION("Intel P4 Precise Event-Based Sampling (PEBS)");
19037 +MODULE_LICENSE("GPL");
19038 +
19039 +#define ALIGN_PEBS(a, order) \
19040 + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1)
19041 +
19042 +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */
19043 +
19044 +static int pfm_pebs_p4_fmt_validate(u32 flags, u16 npmds, void *data)
19045 +{
19046 + struct pfm_pebs_p4_smpl_arg *arg = data;
19047 + size_t min_buf_size;
19048 +
19049 + /*
19050 + * need to define at least the size of the buffer
19051 + */
19052 + if (data == NULL) {
19053 + PFM_DBG("no argument passed");
19054 + return -EINVAL;
19055 + }
19056 +
19057 + /*
19058 + * compute min buf size. npmds is the maximum number
19059 + * of implemented PMD registers.
19060 + */
19061 + min_buf_size = sizeof(struct pfm_pebs_p4_smpl_hdr)
19062 + + sizeof(struct pfm_pebs_p4_smpl_entry)
19063 + + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */
19064 +
19065 + PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
19066 + flags,
19067 + min_buf_size,
19068 + arg->buf_size);
19069 +
19070 + /*
19071 + * must hold at least the buffer header + one minimally sized entry
19072 + */
19073 + if (arg->buf_size < min_buf_size)
19074 + return -EINVAL;
19075 +
19076 + return 0;
19077 +}
19078 +
19079 +static int pfm_pebs_p4_fmt_get_size(unsigned int flags, void *data, size_t *size)
19080 +{
19081 + struct pfm_pebs_p4_smpl_arg *arg = data;
19082 +
19083 + /*
19084 + * size has been validated in pfm_pebs_p4_fmt_validate()
19085 + */
19086 + *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER);
19087 +
19088 + return 0;
19089 +}
19090 +
19091 +static int pfm_pebs_p4_fmt_init(struct pfm_context *ctx, void *buf,
19092 + u32 flags, u16 npmds, void *data)
19093 +{
19094 + struct pfm_arch_context *ctx_arch;
19095 + struct pfm_pebs_p4_smpl_hdr *hdr;
19096 + struct pfm_pebs_p4_smpl_arg *arg = data;
19097 + unsigned long pebs_start, pebs_end;
19098 + struct pfm_ds_area_p4 *ds;
19099 +
19100 + ctx_arch = pfm_ctx_arch(ctx);
19101 +
19102 + hdr = buf;
19103 + ds = &hdr->ds;
19104 +
19105 + /*
19106 + * align PEBS buffer base
19107 + */
19108 + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER);
19109 + pebs_end = pebs_start + arg->buf_size + 1;
19110 +
19111 + hdr->version = PFM_PEBS_P4_SMPL_VERSION;
19112 + hdr->buf_size = arg->buf_size;
19113 + hdr->overflows = 0;
19114 +
19115 + /*
19116 + * express PEBS buffer base as offset from the end of the header
19117 + */
19118 + hdr->start_offs = pebs_start - (unsigned long)(hdr+1);
19119 +
19120 + /*
19121 + * PEBS buffer boundaries
19122 + */
19123 + ds->pebs_buf_base = pebs_start;
19124 + ds->pebs_abs_max = pebs_end;
19125 +
19126 + /*
19127 + * PEBS starting position
19128 + */
19129 + ds->pebs_index = pebs_start;
19130 +
19131 + /*
19132 + * PEBS interrupt threshold
19133 + */
19134 + ds->pebs_intr_thres = pebs_start
19135 + + arg->intr_thres * sizeof(struct pfm_pebs_p4_smpl_entry);
19136 +
19137 + /*
19138 + * save counter reset value for PEBS counter
19139 + */
19140 + ds->pebs_cnt_reset = arg->cnt_reset;
19141 +
19142 + /*
19143 + * keep track of DS AREA
19144 + */
19145 + ctx_arch->ds_area = ds;
19146 + ctx_arch->flags.use_pebs = 1;
19147 + ctx_arch->flags.use_ds = 1;
19148 +
19149 + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%lx "
19150 + "pebs_end=0x%lx ds=%p pebs_thres=0x%lx cnt_reset=0x%llx",
19151 + buf,
19152 + (unsigned long long)hdr->buf_size,
19153 + (unsigned long long)hdr->start_offs,
19154 + pebs_start,
19155 + pebs_end,
19156 + ds,
19157 + ds->pebs_intr_thres,
19158 + (unsigned long long)ds->pebs_cnt_reset);
19159 +
19160 + return 0;
19161 +}
19162 +
19163 +static int pfm_pebs_p4_fmt_handler(struct pfm_context *ctx,
19164 + unsigned long ip, u64 tstamp, void *data)
19165 +{
19166 + struct pfm_pebs_p4_smpl_hdr *hdr;
19167 + struct pfm_ovfl_arg *arg;
19168 +
19169 + hdr = ctx->smpl_addr;
19170 + arg = &ctx->ovfl_arg;
19171 +
19172 + PFM_DBG_ovfl("buffer full");
19173 + /*
19174 + * increment number of buffer overflows.
19175 + * important to detect duplicate set of samples.
19176 + */
19177 + hdr->overflows++;
19178 +
19179 + /*
19180 + * request notification and masking of monitoring.
19181 + * Notification is still subject to the overflowed
19182 + * register having the FL_NOTIFY flag set.
19183 + */
19184 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
19185 +
19186 + return -ENOBUFS; /* we are full, sorry */
19187 +}
19188 +
19189 +static int pfm_pebs_p4_fmt_restart(int is_active, u32 *ovfl_ctrl,
19190 + void *buf)
19191 +{
19192 + struct pfm_pebs_p4_smpl_hdr *hdr = buf;
19193 +
19194 + /*
19195 + * reset index to base of buffer
19196 + */
19197 + hdr->ds.pebs_index = hdr->ds.pebs_buf_base;
19198 +
19199 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
19200 +
19201 + return 0;
19202 +}
19203 +
19204 +static int pfm_pebs_p4_fmt_exit(void *buf)
19205 +{
19206 + return 0;
19207 +}
19208 +
19209 +static struct pfm_smpl_fmt pebs_p4_fmt = {
19210 + .fmt_name = PFM_PEBS_P4_SMPL_NAME,
19211 + .fmt_version = 0x1,
19212 + .fmt_arg_size = sizeof(struct pfm_pebs_p4_smpl_arg),
19213 + .fmt_validate = pfm_pebs_p4_fmt_validate,
19214 + .fmt_getsize = pfm_pebs_p4_fmt_get_size,
19215 + .fmt_init = pfm_pebs_p4_fmt_init,
19216 + .fmt_handler = pfm_pebs_p4_fmt_handler,
19217 + .fmt_restart = pfm_pebs_p4_fmt_restart,
19218 + .fmt_exit = pfm_pebs_p4_fmt_exit,
19219 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
19220 + .owner = THIS_MODULE,
19221 +};
19222 +
19223 +static int __init pfm_pebs_p4_fmt_init_module(void)
19224 +{
19225 + int ht_enabled;
19226 +
19227 + if (!cpu_has_pebs) {
19228 + PFM_INFO("processor does not have PEBS support");
19229 + return -1;
19230 + }
19231 + if (current_cpu_data.x86 != 15) {
19232 + PFM_INFO("not an Intel Pentium 4");
19233 + return -1;
19234 + }
19235 +#ifdef CONFIG_SMP
19236 + ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map))
19237 + / current_cpu_data.x86_max_cores) > 1;
19238 +#else
19239 + ht_enabled = 0;
19240 +#endif
19241 + if (ht_enabled) {
19242 + PFM_INFO("PEBS not available because HyperThreading is on");
19243 + return -1;
19244 + }
19245 + return pfm_fmt_register(&pebs_p4_fmt);
19246 +}
19247 +
19248 +static void __exit pfm_pebs_p4_fmt_cleanup_module(void)
19249 +{
19250 + pfm_fmt_unregister(&pebs_p4_fmt);
19251 +}
19252 +
19253 +module_init(pfm_pebs_p4_fmt_init_module);
19254 +module_exit(pfm_pebs_p4_fmt_cleanup_module);
19255 diff --git a/include/asm-mips/Kbuild b/include/asm-mips/Kbuild
19256 index 7897f05..7ed16fc 100644
19257 --- a/include/asm-mips/Kbuild
19258 +++ b/include/asm-mips/Kbuild
19259 @@ -1,3 +1,4 @@
19260 include include/asm-generic/Kbuild.asm
19261
19262 header-y += cachectl.h sgidefs.h sysmips.h
19263 +header-y += perfmon.h
19264 diff --git a/include/asm-mips/perfmon.h b/include/asm-mips/perfmon.h
19265 new file mode 100644
19266 index 0000000..7915c17
19267 --- /dev/null
19268 +++ b/include/asm-mips/perfmon.h
19269 @@ -0,0 +1,34 @@
19270 +/*
19271 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
19272 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19273 + *
19274 + * This file contains mips64 specific definitions for the perfmon
19275 + * interface.
19276 + *
19277 + * This file MUST never be included directly. Use linux/perfmon.h.
19278 + *
19279 + * This program is free software; you can redistribute it and/or
19280 + * modify it under the terms of version 2 of the GNU General Public
19281 + * License as published by the Free Software Foundation.
19282 + *
19283 + * This program is distributed in the hope that it will be useful,
19284 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19285 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19286 + * General Public License for more details.
19287 + *
19288 + * You should have received a copy of the GNU General Public License
19289 + * along with this program; if not, write to the Free Software
19290 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19291 + * 02111-1307 USA
19292 + */
19293 +#ifndef _ASM_MIPS64_PERFMON_H_
19294 +#define _ASM_MIPS64_PERFMON_H_
19295 +
19296 +/*
19297 + * arch-specific user visible interface definitions
19298 + */
19299 +
19300 +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
19301 +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
19302 +
19303 +#endif /* _ASM_MIPS64_PERFMON_H_ */
19304 diff --git a/include/asm-mips/perfmon_kern.h b/include/asm-mips/perfmon_kern.h
19305 new file mode 100644
19306 index 0000000..7d213df
19307 --- /dev/null
19308 +++ b/include/asm-mips/perfmon_kern.h
19309 @@ -0,0 +1,412 @@
19310 +/*
19311 + * Copyright (c) 2005 Philip Mucci.
19312 + *
19313 + * Based on other versions:
19314 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
19315 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19316 + *
19317 + * This file contains mips64 specific definitions for the perfmon
19318 + * interface.
19319 + *
19320 + * This program is free software; you can redistribute it and/or
19321 + * modify it under the terms of version 2 of the GNU General Public
19322 + * License as published by the Free Software Foundation.
19323 + *
19324 + * This program is distributed in the hope that it will be useful,
19325 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19326 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19327 + * General Public License for more details.
19328 + *
19329 + * You should have received a copy of the GNU General Public License
19330 + * along with this program; if not, write to the Free Software
19331 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19332 + * 02111-1307 USA
19333 + */
19334 +#ifndef _ASM_MIPS64_PERFMON_KERN_H_
19335 +#define _ASM_MIPS64_PERFMON_KERN_H_
19336 +
19337 +#ifdef __KERNEL__
19338 +
19339 +#ifdef CONFIG_PERFMON
19340 +#include <linux/unistd.h>
19341 +#include <asm/cacheflush.h>
19342 +
19343 +#define PFM_ARCH_PMD_STK_ARG 2
19344 +#define PFM_ARCH_PMC_STK_ARG 2
19345 +
19346 +struct pfm_arch_pmu_info {
19347 + u32 pmu_style;
19348 +};
19349 +
19350 +#define MIPS64_CONFIG_PMC_MASK (1 << 4)
19351 +#define MIPS64_PMC_INT_ENABLE_MASK (1 << 4)
19352 +#define MIPS64_PMC_CNT_ENABLE_MASK (0xf)
19353 +#define MIPS64_PMC_EVT_MASK (0x7 << 6)
19354 +#define MIPS64_PMC_CTR_MASK (1 << 31)
19355 +#define MIPS64_PMD_INTERRUPT (1 << 31)
19356 +
19357 +/* Coprocessor register 25 contains the PMU interface. */
19358 +/* Sel 0 is control for counter 0 */
19359 +/* Sel 1 is count for counter 0. */
19360 +/* Sel 2 is control for counter 1. */
19361 +/* Sel 3 is count for counter 1. */
19362 +
19363 +/*
19364 +
19365 +31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
19366 +M 0--------------------------------------------------------------0 Event-- IE U S K EXL
19367 +
19368 +M 31 If this bit is one, another pair of Performance Control
19369 +and Counter registers is implemented at a MTC0
19370 +
19371 +Event 8:5 Counter event enabled for this counter. Possible events
19372 +are listed in Table 6-30. R/W Undefined
19373 +
19374 +IE 4 Counter Interrupt Enable. This bit masks bit 31 of the
19375 +associated count register from the interrupt exception
19376 +request output. R/W 0
19377 +
19378 +U 3 Count in User Mode. When this bit is set, the specified
19379 +event is counted in User Mode. R/W Undefined
19380 +
19381 +S 2 Count in Supervisor Mode. When this bit is set, the
19382 +specified event is counted in Supervisor Mode. R/W Undefined
19383 +
19384 +K 1 Count in Kernel Mode. When this bit is set, count the
19385 +event in Kernel Mode when EXL and ERL both are 0. R/W Undefined
19386 +
19387 +EXL 0 Count when EXL. When this bit is set, count the event
19388 +when EXL = 1 and ERL = 0. R/W Undefined
19389 +*/
19390 +
19391 +static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
19392 +{}
19393 +
19394 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
19395 + struct pfm_event_set *set)
19396 +{}
19397 +
19398 +static inline void pfm_arch_serialize(void)
19399 +{}
19400 +
19401 +
19402 +/*
19403 + * MIPS does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
19404 + * this routine needs to do it when switching sets on overflow
19405 + */
19406 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
19407 + struct pfm_event_set *set)
19408 +{
19409 + pfm_save_pmds(ctx, set);
19410 +}
19411 +
19412 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
19413 + unsigned int cnum, u64 value)
19414 +{
19415 + /*
19416 + * we only write to the actual register when monitoring is
19417 + * active (pfm_start was issued)
19418 + */
19419 + if (ctx && (ctx->flags.started == 0))
19420 + return;
19421 +
19422 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
19423 + case 0:
19424 + write_c0_perfctrl0(value);
19425 + break;
19426 + case 1:
19427 + write_c0_perfctrl1(value);
19428 + break;
19429 + case 2:
19430 + write_c0_perfctrl2(value);
19431 + break;
19432 + case 3:
19433 + write_c0_perfctrl3(value);
19434 + break;
19435 + default:
19436 + BUG();
19437 + }
19438 +}
19439 +
19440 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
19441 + unsigned int cnum, u64 value)
19442 +{
19443 + value &= pfm_pmu_conf->ovfl_mask;
19444 +
19445 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
19446 + case 0:
19447 + write_c0_perfcntr0(value);
19448 + break;
19449 + case 1:
19450 + write_c0_perfcntr1(value);
19451 + break;
19452 + case 2:
19453 + write_c0_perfcntr2(value);
19454 + break;
19455 + case 3:
19456 + write_c0_perfcntr3(value);
19457 + break;
19458 + default:
19459 + BUG();
19460 + }
19461 +}
19462 +
19463 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
19464 +{
19465 + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
19466 + case 0:
19467 + return read_c0_perfcntr0();
19468 + break;
19469 + case 1:
19470 + return read_c0_perfcntr1();
19471 + break;
19472 + case 2:
19473 + return read_c0_perfcntr2();
19474 + break;
19475 + case 3:
19476 + return read_c0_perfcntr3();
19477 + break;
19478 + default:
19479 + BUG();
19480 + return 0;
19481 + }
19482 +}
19483 +
19484 +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
19485 +{
19486 + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
19487 + case 0:
19488 + return read_c0_perfctrl0();
19489 + break;
19490 + case 1:
19491 + return read_c0_perfctrl1();
19492 + break;
19493 + case 2:
19494 + return read_c0_perfctrl2();
19495 + break;
19496 + case 3:
19497 + return read_c0_perfctrl3();
19498 + break;
19499 + default:
19500 + BUG();
19501 + return 0;
19502 + }
19503 +}
19504 +
19505 +/*
19506 + * For some CPUs, the upper bits of a counter must be set in order for the
19507 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
19508 + * and the upper bits are cleared. This function may be used to set them back.
19509 + */
19510 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
19511 + unsigned int cnum)
19512 +{
19513 + u64 val;
19514 + val = pfm_arch_read_pmd(ctx, cnum);
19515 + /* This masks out overflow bit 31 */
19516 + pfm_arch_write_pmd(ctx, cnum, val);
19517 +}
19518 +
19519 +/*
19520 + * At certain points, perfmon needs to know if monitoring has been
19521 + * explicitely started/stopped by user via pfm_start/pfm_stop. The
19522 + * information is tracked in ctx.flags.started. However on certain
19523 + * architectures, it may be possible to start/stop directly from
19524 + * user level with a single assembly instruction bypassing
19525 + * the kernel. This function must be used to determine by
19526 + * an arch-specific mean if monitoring is actually started/stopped.
19527 + */
19528 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
19529 +{
19530 + return ctx->flags.started;
19531 +}
19532 +
19533 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
19534 + struct pfm_context *ctx)
19535 +{}
19536 +
19537 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
19538 + struct pfm_context *ctx)
19539 +{}
19540 +
19541 +static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
19542 + struct pfm_context *ctx)
19543 +{}
19544 +int pfm_arch_ctxswout_thread(struct task_struct *task,
19545 + struct pfm_context *ctx);
19546 +
19547 +int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
19548 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
19549 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
19550 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
19551 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
19552 +char *pfm_arch_get_pmu_module_name(void);
19553 +
19554 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
19555 + struct pfm_event_set *set)
19556 +{
19557 + pfm_arch_stop(current, ctx);
19558 + /*
19559 + * we mark monitoring as stopped to avoid
19560 + * certain side effects especially in
19561 + * pfm_switch_sets_from_intr() on
19562 + * pfm_arch_restore_pmcs()
19563 + */
19564 + ctx->flags.started = 0;
19565 +}
19566 +
19567 +/*
19568 + * unfreeze PMU from pfm_do_interrupt_handler()
19569 + * ctx may be NULL for spurious
19570 + */
19571 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
19572 +{
19573 + if (!ctx)
19574 + return;
19575 +
19576 + PFM_DBG_ovfl("state=%d", ctx->state);
19577 +
19578 + ctx->flags.started = 1;
19579 +
19580 + if (ctx->state == PFM_CTX_MASKED)
19581 + return;
19582 +
19583 + pfm_arch_restore_pmcs(ctx, ctx->active_set);
19584 +}
19585 +
19586 +/*
19587 + * this function is called from the PMU interrupt handler ONLY.
19588 + * On MIPS, the PMU is frozen via arch_stop, masking would be implemented
19589 + * via arch-stop as well. Given that the PMU is already stopped when
19590 + * entering the interrupt handler, we do not need to stop it again, so
19591 + * this function is a nop.
19592 + */
19593 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
19594 + struct pfm_event_set *set)
19595 +{}
19596 +
19597 +/*
19598 + * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
19599 + * need to start here.
19600 + */
19601 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
19602 + struct pfm_event_set *set)
19603 +{
19604 + pfm_arch_start(current, ctx);
19605 +}
19606 +
19607 +static inline int pfm_arch_context_create(struct pfm_context *ctx,
19608 + u32 ctx_flags)
19609 +{
19610 + return 0;
19611 +}
19612 +
19613 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
19614 +{}
19615 +
19616 +
19617 +
19618 +
19619 +
19620 +/*
19621 + * function called from pfm_setfl_sane(). Context is locked
19622 + * and interrupts are masked.
19623 + * The value of flags is the value of ctx_flags as passed by
19624 + * user.
19625 + *
19626 + * function must check arch-specific set flags.
19627 + * Return:
19628 + * 1 when flags are valid
19629 + * 0 on error
19630 + */
19631 +static inline int
19632 +pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
19633 +{
19634 + return 0;
19635 +}
19636 +
19637 +static inline int pfm_arch_init(void)
19638 +{
19639 + return 0;
19640 +}
19641 +
19642 +static inline void pfm_arch_init_percpu(void)
19643 +{}
19644 +
19645 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
19646 +{
19647 + return 0;
19648 +}
19649 +
19650 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
19651 +{}
19652 +
19653 +static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
19654 +{
19655 + return 0;
19656 +}
19657 +
19658 +static inline void pfm_arch_pmu_release(void)
19659 +{}
19660 +
19661 +#ifdef CONFIG_PERFMON_FLUSH
19662 +/*
19663 + * due to cache aliasing problem on MIPS, it is necessary to flush
19664 + * pages out of the cache when they are modified.
19665 + */
19666 +static inline void pfm_cacheflush(void *addr, unsigned int len)
19667 +{
19668 + unsigned long start, end;
19669 +
19670 + start = (unsigned long)addr & PAGE_MASK;
19671 + end = ((unsigned long)addr + len + PAGE_SIZE - 1) & PAGE_MASK;
19672 +
19673 + while (start < end) {
19674 + flush_data_cache_page(start);
19675 + start += PAGE_SIZE;
19676 + }
19677 +}
19678 +#else
19679 +static inline void pfm_cacheflush(void *addr, unsigned int len)
19680 +{}
19681 +#endif
19682 +
19683 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
19684 +{}
19685 +
19686 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
19687 +{}
19688 +
19689 +static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
19690 +{
19691 + return 0;
19692 +}
19693 +
19694 +static inline int pfm_arch_get_base_syscall(void)
19695 +{
19696 + if (test_thread_flag(TIF_32BIT_ADDR)) {
19697 + if (test_thread_flag(TIF_32BIT_REGS))
19698 + return __NR_O32_Linux+330;
19699 + return __NR_N32_Linux+293;
19700 + }
19701 + return __NR_64_Linux+289;
19702 +}
19703 +
19704 +struct pfm_arch_context {
19705 + /* empty */
19706 +};
19707 +
19708 +#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
19709 +/*
19710 + * MIPS may need extra alignment requirements for the sampling buffer
19711 + */
19712 +#ifdef CONFIG_PERFMON_SMPL_ALIGN
19713 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0x4000
19714 +#else
19715 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
19716 +#endif
19717 +
19718 +#endif /* CONFIG_PERFMON */
19719 +
19720 +#endif /* __KERNEL__ */
19721 +#endif /* _ASM_MIPS64_PERFMON_KERN_H_ */
19722 diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
19723 index a944eda..470cdfc 100644
19724 --- a/include/asm-mips/system.h
19725 +++ b/include/asm-mips/system.h
19726 @@ -67,6 +67,10 @@ do { \
19727 __mips_mt_fpaff_switch_to(prev); \
19728 if (cpu_has_dsp) \
19729 __save_dsp(prev); \
19730 + if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
19731 + pfm_ctxsw_out(prev, next); \
19732 + if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
19733 + pfm_ctxsw_in(prev, next); \
19734 (last) = resume(prev, next, task_thread_info(next)); \
19735 } while (0)
19736
19737 diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
19738 index bb30606..34fd6aa 100644
19739 --- a/include/asm-mips/thread_info.h
19740 +++ b/include/asm-mips/thread_info.h
19741 @@ -114,6 +114,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
19742 #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
19743 #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
19744 #define TIF_SECCOMP 4 /* secure computing */
19745 +#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */
19746 #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
19747 #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
19748 #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
19749 @@ -124,6 +125,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
19750 #define TIF_32BIT_REGS 22 /* also implies 16/32 fprs */
19751 #define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */
19752 #define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */
19753 +#define TIF_PERFMON_CTXSW 25 /* perfmon needs ctxsw calls */
19754 #define TIF_SYSCALL_TRACE 31 /* syscall trace active */
19755
19756 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
19757 @@ -140,6 +142,8 @@ register struct thread_info *__current_thread_info __asm__("$28");
19758 #define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS)
19759 #define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
19760 #define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
19761 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
19762 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
19763
19764 /* work to do on interrupt/exception return */
19765 #define _TIF_WORK_MASK (0x0000ffef & ~_TIF_SECCOMP)
19766 diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
19767 index a73e153..200f654 100644
19768 --- a/include/asm-mips/unistd.h
19769 +++ b/include/asm-mips/unistd.h
19770 @@ -350,11 +350,23 @@
19771 #define __NR_dup3 (__NR_Linux + 327)
19772 #define __NR_pipe2 (__NR_Linux + 328)
19773 #define __NR_inotify_init1 (__NR_Linux + 329)
19774 +#define __NR_pfm_create_context (__NR_Linux + 330)
19775 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
19776 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
19777 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
19778 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
19779 +#define __NR_pfm_start (__NR_pfm_create_context+5)
19780 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
19781 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
19782 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
19783 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
19784 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
19785 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
19786
19787 /*
19788 * Offset of the last Linux o32 flavoured syscall
19789 */
19790 -#define __NR_Linux_syscalls 329
19791 +#define __NR_Linux_syscalls 341
19792
19793 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
19794
19795 @@ -656,16 +668,28 @@
19796 #define __NR_dup3 (__NR_Linux + 286)
19797 #define __NR_pipe2 (__NR_Linux + 287)
19798 #define __NR_inotify_init1 (__NR_Linux + 288)
19799 +#define __NR_pfm_create_context (__NR_Linux + 289)
19800 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
19801 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
19802 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
19803 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
19804 +#define __NR_pfm_start (__NR_pfm_create_context+5)
19805 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
19806 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
19807 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
19808 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
19809 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
19810 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
19811
19812 /*
19813 * Offset of the last Linux 64-bit flavoured syscall
19814 */
19815 -#define __NR_Linux_syscalls 288
19816 +#define __NR_Linux_syscalls 300
19817
19818 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
19819
19820 #define __NR_64_Linux 5000
19821 -#define __NR_64_Linux_syscalls 288
19822 +#define __NR_64_Linux_syscalls 300
19823
19824 #if _MIPS_SIM == _MIPS_SIM_NABI32
19825
19826 @@ -966,16 +990,28 @@
19827 #define __NR_dup3 (__NR_Linux + 290)
19828 #define __NR_pipe2 (__NR_Linux + 291)
19829 #define __NR_inotify_init1 (__NR_Linux + 292)
19830 +#define __NR_pfm_create_context (__NR_Linux + 293)
19831 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
19832 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
19833 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
19834 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
19835 +#define __NR_pfm_start (__NR_pfm_create_context+5)
19836 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
19837 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
19838 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
19839 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
19840 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
19841 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
19842
19843 /*
19844 * Offset of the last N32 flavoured syscall
19845 */
19846 -#define __NR_Linux_syscalls 292
19847 +#define __NR_Linux_syscalls 304
19848
19849 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
19850
19851 #define __NR_N32_Linux 6000
19852 -#define __NR_N32_Linux_syscalls 292
19853 +#define __NR_N32_Linux_syscalls 304
19854
19855 #ifdef __KERNEL__
19856
19857 diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
19858 index 4a8e80c..d7d819e 100644
19859 --- a/include/asm-x86/Kbuild
19860 +++ b/include/asm-x86/Kbuild
19861 @@ -9,6 +9,7 @@ header-y += prctl.h
19862 header-y += ptrace-abi.h
19863 header-y += sigcontext32.h
19864 header-y += ucontext.h
19865 +header-y += perfmon.h
19866 header-y += processor-flags.h
19867
19868 unifdef-y += e820.h
19869 diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
19870 index 61cea9e..275e015 100644
19871 --- a/include/asm-x86/ia32_unistd.h
19872 +++ b/include/asm-x86/ia32_unistd.h
19873 @@ -8,11 +8,12 @@
19874 * the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
19875 */
19876
19877 -#define __NR_ia32_restart_syscall 0
19878 -#define __NR_ia32_exit 1
19879 -#define __NR_ia32_read 3
19880 -#define __NR_ia32_write 4
19881 -#define __NR_ia32_sigreturn 119
19882 -#define __NR_ia32_rt_sigreturn 173
19883 +#define __NR_ia32_restart_syscall 0
19884 +#define __NR_ia32_exit 1
19885 +#define __NR_ia32_read 3
19886 +#define __NR_ia32_write 4
19887 +#define __NR_ia32_sigreturn 119
19888 +#define __NR_ia32_rt_sigreturn 173
19889 +#define __NR_ia32_pfm_create_context 333
19890
19891 #endif /* _ASM_X86_64_IA32_UNISTD_H_ */
19892 diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
19893 index a48c7f2..892fe8f 100644
19894 --- a/include/asm-x86/irq_vectors.h
19895 +++ b/include/asm-x86/irq_vectors.h
19896 @@ -92,6 +92,11 @@
19897 #define LOCAL_TIMER_VECTOR 0xef
19898
19899 /*
19900 + * Perfmon PMU interrupt vector
19901 + */
19902 +#define LOCAL_PERFMON_VECTOR 0xee
19903 +
19904 +/*
19905 * First APIC vector available to drivers: (vectors 0x30-0xee) we
19906 * start at 0x31(0x41) to spread out vectors evenly between priority
19907 * levels. (0x80 is the syscall vector)
19908 diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
19909 index 9283b60..ac31c2d 100644
19910 --- a/include/asm-x86/mach-default/entry_arch.h
19911 +++ b/include/asm-x86/mach-default/entry_arch.h
19912 @@ -32,4 +32,8 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
19913 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
19914 #endif
19915
19916 +#ifdef CONFIG_PERFMON
19917 +BUILD_INTERRUPT(pmu_interrupt,LOCAL_PERFMON_VECTOR)
19918 +#endif
19919 +
19920 #endif
19921 diff --git a/include/asm-x86/perfmon.h b/include/asm-x86/perfmon.h
19922 new file mode 100644
19923 index 0000000..906f4b2
19924 --- /dev/null
19925 +++ b/include/asm-x86/perfmon.h
19926 @@ -0,0 +1,34 @@
19927 +/*
19928 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
19929 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19930 + *
19931 + * This file contains i386/x86_64 specific definitions for the perfmon
19932 + * interface.
19933 + *
19934 + * This file MUST never be included directly. Use linux/perfmon.h.
19935 + *
19936 + * This program is free software; you can redistribute it and/or
19937 + * modify it under the terms of version 2 of the GNU General Public
19938 + * License as published by the Free Software Foundation.
19939 + *
19940 + * This program is distributed in the hope that it will be useful,
19941 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19942 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19943 + * General Public License for more details.
19944 + *
19945 + * You should have received a copy of the GNU General Public License
19946 + * along with this program; if not, write to the Free Software
19947 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19948 + * 02111-1307 USA
19949 + */
19950 +#ifndef _ASM_X86_PERFMON__H_
19951 +#define _ASM_X86_PERFMON__H_
19952 +
19953 +/*
19954 + * arch-specific user visible interface definitions
19955 + */
19956 +
19957 +#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
19958 +#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
19959 +
19960 +#endif /* _ASM_X86_PERFMON_H_ */
19961 diff --git a/include/asm-x86/perfmon_kern.h b/include/asm-x86/perfmon_kern.h
19962 new file mode 100644
19963 index 0000000..0e5d3a5
19964 --- /dev/null
19965 +++ b/include/asm-x86/perfmon_kern.h
19966 @@ -0,0 +1,548 @@
19967 +/*
19968 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
19969 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
19970 + *
19971 + * Copyright (c) 2007 Advanced Micro Devices, Inc.
19972 + * Contributed by Robert Richter <robert.richter@amd.com>
19973 + *
19974 + * This file contains X86 Processor Family specific definitions
19975 + * for the perfmon interface. This covers P6, Pentium M, P4/Xeon
19976 + * (32-bit and 64-bit, i.e., EM64T) and AMD X86-64.
19977 + *
19978 + * This program is free software; you can redistribute it and/or
19979 + * modify it under the terms of version 2 of the GNU General Public
19980 + * License as published by the Free Software Foundation.
19981 + *
19982 + * This program is distributed in the hope that it will be useful,
19983 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
19984 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19985 + * General Public License for more details.
19986 + *
19987 + * You should have received a copy of the GNU General Public License
19988 + * along with this program; if not, write to the Free Software
19989 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19990 + * 02111-1307 USA
19991 + */
19992 +#ifndef _ASM_X86_PERFMON_KERN_H_
19993 +#define _ASM_X86_PERFMON_KERN_H_
19994 +
19995 +#ifdef CONFIG_PERFMON
19996 +#include <linux/unistd.h>
19997 +#ifdef CONFIG_4KSTACKS
19998 +#define PFM_ARCH_PMD_STK_ARG 2
19999 +#define PFM_ARCH_PMC_STK_ARG 2
20000 +#else
20001 +#define PFM_ARCH_PMD_STK_ARG 4 /* about 700 bytes of stack space */
20002 +#define PFM_ARCH_PMC_STK_ARG 4 /* about 200 bytes of stack space */
20003 +#endif
20004 +
20005 +struct pfm_arch_pmu_info {
20006 + u32 flags; /* PMU feature flags */
20007 + /*
20008 + * mandatory model-specific callbacks
20009 + */
20010 + int (*stop_save)(struct pfm_context *ctx, struct pfm_event_set *set);
20011 + int (*has_ovfls)(struct pfm_context *ctx);
20012 + void (*quiesce)(void);
20013 +
20014 + /*
20015 + * optional model-specific callbacks
20016 + */
20017 + void (*acquire_pmu_percpu)(void);
20018 + void (*release_pmu_percpu)(void);
20019 + int (*create_context)(struct pfm_context *ctx, u32 ctx_flags);
20020 + void (*free_context)(struct pfm_context *ctx);
20021 + int (*load_context)(struct pfm_context *ctx);
20022 + void (*unload_context)(struct pfm_context *ctx);
20023 + void (*write_pmc)(struct pfm_context *ctx, unsigned int cnum, u64 value);
20024 + void (*write_pmd)(struct pfm_context *ctx, unsigned int cnum, u64 value);
20025 + u64 (*read_pmd)(struct pfm_context *ctx, unsigned int cnum);
20026 + u64 (*read_pmc)(struct pfm_context *ctx, unsigned int cnum);
20027 + void (*nmi_copy_state)(struct pfm_context *ctx);
20028 + void (*restore_pmcs)(struct pfm_context *ctx,
20029 + struct pfm_event_set *set);
20030 + void (*restore_pmds)(struct pfm_context *ctx,
20031 + struct pfm_event_set *set);
20032 +};
20033 +
20034 +/*
20035 + * PMU feature flags
20036 + */
20037 +#define PFM_X86_FL_USE_NMI 0x01 /* user asking for NMI */
20038 +#define PFM_X86_FL_NO_SHARING 0x02 /* no sharing with other subsystems */
20039 +#define PFM_X86_FL_SHARING 0x04 /* PMU is being shared */
20040 +
20041 +struct pfm_x86_ctx_flags {
20042 + unsigned int insecure:1; /* rdpmc per-thread self-monitoring */
20043 + unsigned int use_pebs:1; /* PEBS used */
20044 + unsigned int use_ds:1; /* DS used */
20045 + unsigned int reserved:29; /* for future use */
20046 +};
20047 +
20048 +struct pfm_arch_context {
20049 + u64 saved_real_iip; /* instr pointer of last NMI intr */
20050 + struct pfm_x86_ctx_flags flags; /* flags */
20051 + void *ds_area; /* address of DS area (to go away) */
20052 + void *data; /* model-specific data */
20053 +};
20054 +
20055 +/*
20056 + * functions implemented as inline on x86
20057 + */
20058 +
20059 +/**
20060 + * pfm_arch_write_pmc - write a single PMC register
20061 + * @ctx: context to work on
20062 + * @cnum: PMC index
20063 + * @value: PMC 64-bit value
20064 + *
20065 + * in certain situations, ctx may be NULL
20066 + */
20067 +static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
20068 + unsigned int cnum, u64 value)
20069 +{
20070 + struct pfm_arch_pmu_info *pmu_info;
20071 +
20072 + pmu_info = pfm_pmu_info();
20073 +
20074 + /*
20075 + * we only write to the actual register when monitoring is
20076 + * active (pfm_start was issued)
20077 + */
20078 + if (ctx && ctx->flags.started == 0)
20079 + return;
20080 +
20081 + /*
20082 + * model-specific override, if any
20083 + */
20084 + if (pmu_info->write_pmc) {
20085 + pmu_info->write_pmc(ctx, cnum, value);
20086 + return;
20087 + }
20088 +
20089 + PFM_DBG_ovfl("pfm_arch_write_pmc(0x%lx, 0x%Lx)",
20090 + pfm_pmu_conf->pmc_desc[cnum].hw_addr,
20091 + (unsigned long long) value);
20092 +
20093 + wrmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
20094 +}
20095 +
20096 +/**
20097 + * pfm_arch_write_pmd - write a single PMD register
20098 + * @ctx: context to work on
20099 + * @cnum: PMD index
20100 + * @value: PMD 64-bit value
20101 + */
20102 +static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
20103 + unsigned int cnum, u64 value)
20104 +{
20105 + struct pfm_arch_pmu_info *pmu_info;
20106 +
20107 + pmu_info = pfm_pmu_info();
20108 +
20109 + /*
20110 + * to make sure the counter overflows, we set the
20111 + * upper bits. we also clear any other unimplemented
20112 + * bits as this may cause crash on some processors.
20113 + */
20114 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
20115 + value = (value | ~pfm_pmu_conf->ovfl_mask)
20116 + & ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
20117 +
20118 + PFM_DBG_ovfl("pfm_arch_write_pmd(0x%lx, 0x%Lx)",
20119 + pfm_pmu_conf->pmd_desc[cnum].hw_addr,
20120 + (unsigned long long) value);
20121 +
20122 + /*
20123 + * model-specific override, if any
20124 + */
20125 + if (pmu_info->write_pmd) {
20126 + pmu_info->write_pmd(ctx, cnum, value);
20127 + return;
20128 + }
20129 +
20130 + wrmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
20131 +}
20132 +
20133 +/**
20134 + * pfm_arch_read_pmd - read a single PMD register
20135 + * @ctx: context to work on
20136 + * @cnum: PMD index
20137 + *
20138 + * return value is register 64-bit value
20139 + */
20140 +static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
20141 +{
20142 + struct pfm_arch_pmu_info *pmu_info;
20143 + u64 tmp;
20144 +
20145 + pmu_info = pfm_pmu_info();
20146 +
20147 + /*
20148 + * model-specific override, if any
20149 + */
20150 + if (pmu_info->read_pmd)
20151 + tmp = pmu_info->read_pmd(ctx, cnum);
20152 + else
20153 + rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp);
20154 +
20155 + PFM_DBG_ovfl("pfm_arch_read_pmd(0x%lx) = 0x%Lx",
20156 + pfm_pmu_conf->pmd_desc[cnum].hw_addr,
20157 + (unsigned long long) tmp);
20158 + return tmp;
20159 +}
20160 +
20161 +/**
20162 + * pfm_arch_read_pmc - read a single PMC register
20163 + * @ctx: context to work on
20164 + * @cnum: PMC index
20165 + *
20166 + * return value is register 64-bit value
20167 + */
20168 +static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
20169 +{
20170 + struct pfm_arch_pmu_info *pmu_info;
20171 + u64 tmp;
20172 +
20173 + pmu_info = pfm_pmu_info();
20174 +
20175 + /*
20176 + * model-specific override, if any
20177 + */
20178 + if (pmu_info->read_pmc)
20179 + tmp = pmu_info->read_pmc(ctx, cnum);
20180 + else
20181 + rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp);
20182 +
20183 + PFM_DBG_ovfl("pfm_arch_read_pmc(0x%lx) = 0x%016Lx",
20184 + pfm_pmu_conf->pmc_desc[cnum].hw_addr,
20185 + (unsigned long long) tmp);
20186 + return tmp;
20187 +}
20188 +
20189 +/**
20190 + * pfm_arch_is_active - return non-zero is monitoring has been started
20191 + * @ctx: context to check
20192 + *
20193 + * At certain points, perfmon needs to know if monitoring has been
20194 + * explicitly started.
20195 + *
20196 + * On x86, there is not other way but to use pfm_start/pfm_stop
20197 + * to activate monitoring, thus we can simply check flags.started
20198 + */
20199 +static inline int pfm_arch_is_active(struct pfm_context *ctx)
20200 +{
20201 + return ctx->flags.started;
20202 +}
20203 +
20204 +
20205 +/**
20206 + * pfm_arch_unload_context - detach context from thread or CPU
20207 + * @ctx: context to detach
20208 + *
20209 + * in system-wide ctx->task is NULL, otherwise it points to the
20210 + * attached thread
20211 + */
20212 +static inline void pfm_arch_unload_context(struct pfm_context *ctx)
20213 +{
20214 + struct pfm_arch_pmu_info *pmu_info;
20215 + struct pfm_arch_context *ctx_arch;
20216 +
20217 + ctx_arch = pfm_ctx_arch(ctx);
20218 + pmu_info = pfm_pmu_info();
20219 +
20220 + if (ctx_arch->flags.insecure) {
20221 + PFM_DBG("clear cr4.pce");
20222 + clear_in_cr4(X86_CR4_PCE);
20223 + }
20224 +
20225 + if (pmu_info->unload_context)
20226 + pmu_info->unload_context(ctx);
20227 +}
20228 +
20229 +/**
20230 + * pfm_arch_load_context - attach context to thread or CPU
20231 + * @ctx: context to attach
20232 + */
20233 +static inline int pfm_arch_load_context(struct pfm_context *ctx)
20234 +{
20235 + struct pfm_arch_pmu_info *pmu_info;
20236 + struct pfm_arch_context *ctx_arch;
20237 + int ret = 0;
20238 +
20239 + ctx_arch = pfm_ctx_arch(ctx);
20240 + pmu_info = pfm_pmu_info();
20241 +
20242 + /*
20243 + * RDPMC authorized in system-wide and
20244 + * per-thread self-monitoring.
20245 + *
20246 + * RDPMC only gives access to counts.
20247 + *
20248 + * The context-switch routine code does not restore
20249 + * all the PMD registers (optimization), thus there
20250 + * is a possible leak of counts there in per-thread
20251 + * mode.
20252 + */
20253 + if (ctx->task == current || ctx->flags.system) {
20254 + PFM_DBG("set cr4.pce");
20255 + set_in_cr4(X86_CR4_PCE);
20256 + ctx_arch->flags.insecure = 1;
20257 + }
20258 +
20259 + if (pmu_info->load_context)
20260 + ret = pmu_info->load_context(ctx);
20261 +
20262 + return ret;
20263 +}
20264 +
20265 +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
20266 +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
20267 +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
20268 +
20269 +/**
20270 + * pfm_arch_unmask_monitoring - unmask monitoring
20271 + * @ctx: context to mask
20272 + * @set: current event set
20273 + *
20274 + * masking is slightly different from stopping in that, it does not undo
20275 + * the pfm_start() issued by user. This is used in conjunction with
20276 + * sampling. Masking means stop monitoring, but do not authorize user
20277 + * to issue pfm_start/stop during that time. Unmasking is achieved via
20278 + * pfm_restart() and also may also depend on the sampling format used.
20279 + *
20280 + * on x86 masking/unmasking use the start/stop mechanism, except
20281 + * that flags.started is not modified.
20282 + */
20283 +static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
20284 + struct pfm_event_set *set)
20285 +{
20286 + pfm_arch_start(current, ctx);
20287 +}
20288 +
20289 +/**
20290 + * pfm_arch_intr_freeze_pmu - stop monitoring when handling PMU interrupt
20291 + * @ctx: current context
20292 + * @set: current event set
20293 + *
20294 + * called from __pfm_interrupt_handler().
20295 + * ctx is not NULL. ctx is locked. interrupts are masked
20296 + *
20297 + * The following actions must take place:
20298 + * - stop all monitoring to ensure handler has consistent view.
20299 + * - collect overflowed PMDs bitmask into povfls_pmds and
20300 + * npend_ovfls. If no interrupt detected then npend_ovfls
20301 + * must be set to zero.
20302 + */
20303 +static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
20304 + struct pfm_event_set *set)
20305 +{
20306 + /*
20307 + * on X86, freezing is equivalent to stopping
20308 + */
20309 + pfm_arch_stop(current, ctx);
20310 +
20311 + /*
20312 + * we mark monitoring as stopped to avoid
20313 + * certain side effects especially in
20314 + * pfm_switch_sets_from_intr() and
20315 + * pfm_arch_restore_pmcs()
20316 + */
20317 + ctx->flags.started = 0;
20318 +}
20319 +
20320 +/**
20321 + * pfm_arch_intr_unfreeze_pmu - conditionally reactive monitoring
20322 + * @ctx: current context
20323 + *
20324 + * current context may be not when dealing when spurious interrupts
20325 + *
20326 + * Must re-activate monitoring if context is not MASKED.
20327 + * interrupts are masked.
20328 + */
20329 +static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
20330 +{
20331 + if (ctx == NULL)
20332 + return;
20333 +
20334 + PFM_DBG_ovfl("state=%d", ctx->state);
20335 +
20336 + /*
20337 + * restore flags.started which is cleared in
20338 + * pfm_arch_intr_freeze_pmu()
20339 + */
20340 + ctx->flags.started = 1;
20341 +
20342 + if (ctx->state == PFM_CTX_MASKED)
20343 + return;
20344 +
20345 + pfm_arch_restore_pmcs(ctx, ctx->active_set);
20346 +}
20347 +
20348 +/**
20349 + * pfm_arch_setfl_sane - check arch/model specific event set flags
20350 + * @ctx: context to work on
20351 + * @flags: event set flags as passed by user
20352 + *
20353 + * called from pfm_setfl_sane(). Context is locked. Interrupts are masked.
20354 + *
20355 + * Return:
20356 + * 0 when flags are valid
20357 + * 1 on error
20358 + */
20359 +static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
20360 +{
20361 + return 0;
20362 +}
20363 +
20364 +/**
20365 + * pfm_arch_ovfl_reset_pmd - reset pmd on overflow
20366 + * @ctx: current context
20367 + * @cnum: PMD index
20368 + *
20369 + * On some CPUs, the upper bits of a counter must be set in order for the
20370 + * overflow interrupt to happen. On overflow, the counter has wrapped around,
20371 + * and the upper bits are cleared. This function may be used to set them back.
20372 + *
20373 + * For x86, the current version loses whatever is remaining in the counter,
20374 + * which is usually has a small count. In order not to loose this count,
20375 + * we do a read-modify-write to set the upper bits while preserving the
20376 + * low-order bits. This is slow but works.
20377 + */
20378 +static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum)
20379 +{
20380 + u64 val;
20381 + val = pfm_arch_read_pmd(ctx, cnum);
20382 + pfm_arch_write_pmd(ctx, cnum, val);
20383 +}
20384 +
20385 +/**
20386 + * pfm_arch_context_create - create context
20387 + * @ctx: newly created context
20388 + * @flags: context flags as passed by user
20389 + *
20390 + * called from __pfm_create_context()
20391 + */
20392 +static inline int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
20393 +{
20394 + struct pfm_arch_pmu_info *pmu_info;
20395 +
20396 + pmu_info = pfm_pmu_info();
20397 +
20398 + if (pmu_info->create_context)
20399 + return pmu_info->create_context(ctx, ctx_flags);
20400 +
20401 + return 0;
20402 +}
20403 +
20404 +/**
20405 + * pfm_arch_context_free - free context
20406 + * @ctx: context to free
20407 + */
20408 +static inline void pfm_arch_context_free(struct pfm_context *ctx)
20409 +{
20410 + struct pfm_arch_pmu_info *pmu_info;
20411 +
20412 + pmu_info = pfm_pmu_info();
20413 +
20414 + if (pmu_info->free_context)
20415 + pmu_info->free_context(ctx);
20416 +}
20417 +
20418 +/*
20419 + * pfm_arch_clear_pmd_ovfl_cond - alter the pmds in such a way that they
20420 + * will not cause cause interrupts when unused.
20421 + *
20422 + * This is a nop on x86
20423 + */
20424 +static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
20425 + struct pfm_event_set *set)
20426 +{}
20427 +
20428 +/*
20429 + * functions implemented in arch/x86/perfmon/perfmon.c
20430 + */
20431 +int pfm_arch_init(void);
20432 +void pfm_arch_resend_irq(struct pfm_context *ctx);
20433 +
20434 +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
20435 +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
20436 +
20437 +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
20438 +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg);
20439 +void pfm_arch_pmu_config_remove(void);
20440 +char *pfm_arch_get_pmu_module_name(void);
20441 +int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds);
20442 +void pfm_arch_pmu_release(void);
20443 +
20444 +/*
20445 + * pfm_arch_serialize - make PMU modifications visible to subsequent instructions
20446 + *
20447 + * This is a nop on x86
20448 + */
20449 +static inline void pfm_arch_serialize(void)
20450 +{}
20451 +
20452 +/*
20453 + * on x86, the PMDs are already saved by pfm_arch_freeze_pmu()
20454 + * when entering the PMU interrupt handler, thus, we do not need
20455 + * to save them again in pfm_switch_sets_from_intr()
20456 + */
20457 +static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
20458 + struct pfm_event_set *set)
20459 +{}
20460 +
20461 +
20462 +static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
20463 + struct pfm_context *ctx)
20464 +{}
20465 +
20466 +static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
20467 + struct pfm_context *ctx)
20468 +{}
20469 +
20470 +static inline void pfm_arch_init_percpu(void)
20471 +{}
20472 +
20473 +static inline void pfm_cacheflush(void *addr, unsigned int len)
20474 +{}
20475 +
20476 +/*
20477 + * this function is called from the PMU interrupt handler ONLY.
20478 + * On x86, the PMU is frozen via arch_stop, masking would be implemented
20479 + * via arch-stop as well. Given that the PMU is already stopped when
20480 + * entering the interrupt handler, we do not need to stop it again, so
20481 + * this function is a nop.
20482 + */
20483 +static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
20484 + struct pfm_event_set *set)
20485 +{}
20486 +
20487 +
20488 +static inline void pfm_arch_arm_handle_work(struct task_struct *task)
20489 +{}
20490 +
20491 +static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
20492 +{}
20493 +
20494 +static inline int pfm_arch_get_base_syscall(void)
20495 +{
20496 +#ifdef __x86_64__
20497 + /* 32-bit syscall definition coming from ia32_unistd.h */
20498 + if (test_thread_flag(TIF_IA32))
20499 + return __NR_ia32_pfm_create_context;
20500 +#endif
20501 + return __NR_pfm_create_context;
20502 +}
20503 +
20504 +#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context))
20505 +/*
20506 + * x86 does not need extra alignment requirements for the sampling buffer
20507 + */
20508 +#define PFM_ARCH_SMPL_ALIGN_SIZE 0
20509 +
20510 +asmlinkage void pmu_interrupt(void);
20511 +
20512 +#endif /* CONFIG_PEFMON */
20513 +
20514 +#endif /* _ASM_X86_PERFMON_KERN_H_ */
20515 diff --git a/include/asm-x86/perfmon_pebs_core_smpl.h b/include/asm-x86/perfmon_pebs_core_smpl.h
20516 new file mode 100644
20517 index 0000000..4a12e0d
20518 --- /dev/null
20519 +++ b/include/asm-x86/perfmon_pebs_core_smpl.h
20520 @@ -0,0 +1,164 @@
20521 +/*
20522 + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
20523 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
20524 + *
20525 + * This program is free software; you can redistribute it and/or
20526 + * modify it under the terms of version 2 of the GNU General Public
20527 + * License as published by the Free Software Foundation.
20528 + *
20529 + * This program is distributed in the hope that it will be useful,
20530 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20531 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20532 + * General Public License for more details.
20533 + *
20534 + * You should have received a copy of the GNU General Public License
20535 + * along with this program; if not, write to the Free Software
20536 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20537 + * 02111-1307 USA
20538 + *
20539 + * This file implements the sampling format to support Intel
20540 + * Precise Event Based Sampling (PEBS) feature of Intel Core
20541 + * processors, such as Intel Core 2.
20542 + *
20543 + * What is PEBS?
20544 + * ------------
20545 + * This is a hardware feature to enhance sampling by providing
20546 + * better precision as to where a sample is taken. This avoids the
20547 + * typical skew in the instruction one can observe with any
20548 + * interrupt-based sampling technique.
20549 + *
20550 + * PEBS also lowers sampling overhead significantly by having the
20551 + * processor store samples instead of the OS. PMU interrupt are only
20552 + * generated after multiple samples are written.
20553 + *
20554 + * Another benefit of PEBS is that samples can be captured inside
20555 + * critical sections where interrupts are masked.
20556 + *
20557 + * How does it work?
20558 + * PEBS effectively implements a Hw buffer. The Os must pass a region
20559 + * of memory where samples are to be stored. The region can have any
20560 + * size. The OS must also specify the sampling period to reload. The PMU
20561 + * will interrupt when it reaches the end of the buffer or a specified
20562 + * threshold location inside the memory region.
20563 + *
20564 + * The description of the buffer is stored in the Data Save Area (DS).
20565 + * The samples are stored sequentially in the buffer. The format of the
20566 + * buffer is fixed and specified in the PEBS documentation. The sample
20567 + * format does not change between 32-bit and 64-bit modes unlike on the
20568 + * Pentium 4 version of PEBS.
20569 + *
20570 + * PEBS does not work when HyperThreading is enabled due to certain MSR
20571 + * being shared being to two threads.
20572 + *
20573 + * What does the format do?
20574 + * It provides access to the PEBS feature for both 32-bit and 64-bit
20575 + * processors that support it.
20576 + *
20577 + * The same code and data structures are used for both 32-bit and 64-bi
20578 + * modes. A single format name is used for both modes. In 32-bit mode,
20579 + * some of the extended registers are written to zero in each sample.
20580 + *
20581 + * It is important to realize that the format provides a zero-copy
20582 + * environment for the samples, i.e,, the OS never touches the
20583 + * samples. Whatever the processor write is directly accessible to
20584 + * the user.
20585 + *
20586 + * Parameters to the buffer can be passed via pfm_create_context() in
20587 + * the pfm_pebs_smpl_arg structure.
20588 + */
20589 +#ifndef __PERFMON_PEBS_CORE_SMPL_H__
20590 +#define __PERFMON_PEBS_CORE_SMPL_H__ 1
20591 +
20592 +/*
20593 + * The 32-bit and 64-bit formats are identical, thus we use only
20594 + * one name for the format.
20595 + */
20596 +#define PFM_PEBS_CORE_SMPL_NAME "pebs_core"
20597 +
20598 +/*
20599 + * format specific parameters (passed at context creation)
20600 + *
20601 + * intr_thres: index from start of buffer of entry where the
20602 + * PMU interrupt must be triggered. It must be several samples
20603 + * short of the end of the buffer.
20604 + */
20605 +struct pfm_pebs_core_smpl_arg {
20606 + u64 cnt_reset; /* counter reset value */
20607 + size_t buf_size; /* size of the PEBS buffer in bytes */
20608 + size_t intr_thres;/* index of PEBS interrupt threshold entry */
20609 + u64 reserved[6]; /* for future use */
20610 +};
20611 +
20612 +/*
20613 + * Data Save Area (32 and 64-bit mode)
20614 + *
20615 + * The DS area is exposed to the user. To determine the number
20616 + * of samples available in PEBS, it is necessary to substract
20617 + * pebs_index from pebs_base.
20618 + *
20619 + * Layout of the structure is mandated by hardware and specified
20620 + * in the Intel documentation.
20621 + */
20622 +struct pfm_ds_area_core {
20623 + u64 bts_buf_base;
20624 + u64 bts_index;
20625 + u64 bts_abs_max;
20626 + u64 bts_intr_thres;
20627 + u64 pebs_buf_base;
20628 + u64 pebs_index;
20629 + u64 pebs_abs_max;
20630 + u64 pebs_intr_thres;
20631 + u64 pebs_cnt_reset;
20632 +};
20633 +
20634 +/*
20635 + * This header is at the beginning of the sampling buffer returned to the user.
20636 + *
20637 + * Because of PEBS alignement constraints, the actual PEBS buffer area does
20638 + * not necessarily begin right after the header. The hdr_start_offs must be
20639 + * used to compute the first byte of the buffer. The offset is defined as
20640 + * the number of bytes between the end of the header and the beginning of
20641 + * the buffer. As such the formula is:
20642 + * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
20643 + */
20644 +struct pfm_pebs_core_smpl_hdr {
20645 + u64 overflows; /* #overflows for buffer */
20646 + size_t buf_size; /* bytes in the buffer */
20647 + size_t start_offs; /* actual buffer start offset */
20648 + u32 version; /* smpl format version */
20649 + u32 reserved1; /* for future use */
20650 + u64 reserved2[5]; /* for future use */
20651 + struct pfm_ds_area_core ds; /* data save area */
20652 +};
20653 +
20654 +/*
20655 + * Sample format as mandated by Intel documentation.
20656 + * The same format is used in both 32 and 64 bit modes.
20657 + */
20658 +struct pfm_pebs_core_smpl_entry {
20659 + u64 eflags;
20660 + u64 ip;
20661 + u64 eax;
20662 + u64 ebx;
20663 + u64 ecx;
20664 + u64 edx;
20665 + u64 esi;
20666 + u64 edi;
20667 + u64 ebp;
20668 + u64 esp;
20669 + u64 r8; /* 0 in 32-bit mode */
20670 + u64 r9; /* 0 in 32-bit mode */
20671 + u64 r10; /* 0 in 32-bit mode */
20672 + u64 r11; /* 0 in 32-bit mode */
20673 + u64 r12; /* 0 in 32-bit mode */
20674 + u64 r13; /* 0 in 32-bit mode */
20675 + u64 r14; /* 0 in 32-bit mode */
20676 + u64 r15; /* 0 in 32-bit mode */
20677 +};
20678 +
20679 +#define PFM_PEBS_CORE_SMPL_VERSION_MAJ 1U
20680 +#define PFM_PEBS_CORE_SMPL_VERSION_MIN 0U
20681 +#define PFM_PEBS_CORE_SMPL_VERSION (((PFM_PEBS_CORE_SMPL_VERSION_MAJ&0xffff)<<16)|\
20682 + (PFM_PEBS_CORE_SMPL_VERSION_MIN & 0xffff))
20683 +
20684 +#endif /* __PERFMON_PEBS_CORE_SMPL_H__ */
20685 diff --git a/include/asm-x86/perfmon_pebs_p4_smpl.h b/include/asm-x86/perfmon_pebs_p4_smpl.h
20686 new file mode 100644
20687 index 0000000..26b51b4
20688 --- /dev/null
20689 +++ b/include/asm-x86/perfmon_pebs_p4_smpl.h
20690 @@ -0,0 +1,193 @@
20691 +/*
20692 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
20693 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
20694 + *
20695 + * This program is free software; you can redistribute it and/or
20696 + * modify it under the terms of version 2 of the GNU General Public
20697 + * License as published by the Free Software Foundation.
20698 + *
20699 + * This program is distributed in the hope that it will be useful,
20700 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
20701 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20702 + * General Public License for more details.
20703 + *
20704 + * You should have received a copy of the GNU General Public License
20705 + * along with this program; if not, write to the Free Software
20706 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20707 + * 02111-1307 USA
20708 + *
20709 + * This file implements the sampling format to support Intel
20710 + * Precise Event Based Sampling (PEBS) feature of Pentium 4
20711 + * and other Netburst-based processors. Not to be used for
20712 + * Intel Core-based processors.
20713 + *
20714 + * What is PEBS?
20715 + * ------------
20716 + * This is a hardware feature to enhance sampling by providing
20717 + * better precision as to where a sample is taken. This avoids the
20718 + * typical skew in the instruction one can observe with any
20719 + * interrupt-based sampling technique.
20720 + *
20721 + * PEBS also lowers sampling overhead significantly by having the
20722 + * processor store samples instead of the OS. PMU interrupt are only
20723 + * generated after multiple samples are written.
20724 + *
20725 + * Another benefit of PEBS is that samples can be captured inside
20726 + * critical sections where interrupts are masked.
20727 + *
20728 + * How does it work?
20729 + * PEBS effectively implements a Hw buffer. The Os must pass a region
20730 + * of memory where samples are to be stored. The region can have any
20731 + * size. The OS must also specify the sampling period to reload. The PMU
20732 + * will interrupt when it reaches the end of the buffer or a specified
20733 + * threshold location inside the memory region.
20734 + *
20735 + * The description of the buffer is stored in the Data Save Area (DS).
20736 + * The samples are stored sequentially in the buffer. The format of the
20737 + * buffer is fixed and specified in the PEBS documentation. The sample
20738 + * format changes between 32-bit and 64-bit modes due to extended register
20739 + * file.
20740 + *
20741 + * PEBS does not work when HyperThreading is enabled due to certain MSR
20742 + * being shared being to two threads.
20743 + *
20744 + * What does the format do?
20745 + * It provides access to the PEBS feature for both 32-bit and 64-bit
20746 + * processors that support it.
20747 + *
20748 + * The same code is used for both 32-bit and 64-bit modes, but different
20749 + * format names are used because the two modes are not compatible due to
20750 + * data model and register file differences. Similarly the public data
20751 + * structures describing the samples are different.
20752 + *
20753 + * It is important to realize that the format provides a zero-copy environment
20754 + * for the samples, i.e,, the OS never touches the samples. Whatever the
20755 + * processor write is directly accessible to the user.
20756 + *
20757 + * Parameters to the buffer can be passed via pfm_create_context() in
20758 + * the pfm_pebs_smpl_arg structure.
20759 + *
20760 + * It is not possible to mix a 32-bit PEBS application on top of a 64-bit
20761 + * host kernel.
20762 + */
20763 +#ifndef __PERFMON_PEBS_P4_SMPL_H__
20764 +#define __PERFMON_PEBS_P4_SMPL_H__ 1
20765 +
20766 +#ifdef __i386__
20767 +/*
20768 + * The 32-bit and 64-bit formats are not compatible, thus we have
20769 + * two different identifications so that 32-bit programs running on
20770 + * 64-bit OS will fail to use the 64-bit PEBS support.
20771 + */
20772 +#define PFM_PEBS_P4_SMPL_NAME "pebs32_p4"
20773 +#else
20774 +#define PFM_PEBS_P4_SMPL_NAME "pebs64_p4"
20775 +#endif
20776 +
20777 +/*
20778 + * format specific parameters (passed at context creation)
20779 + *
20780 + * intr_thres: index from start of buffer of entry where the
20781 + * PMU interrupt must be triggered. It must be several samples
20782 + * short of the end of the buffer.
20783 + */
20784 +struct pfm_pebs_p4_smpl_arg {
20785 + u64 cnt_reset; /* counter reset value */
20786 + size_t buf_size; /* size of the PEBS buffer in bytes */
20787 + size_t intr_thres;/* index of PEBS interrupt threshold entry */
20788 + u64 reserved[6]; /* for future use */
20789 +};
20790 +
20791 +/*
20792 + * Data Save Area (32 and 64-bit mode)
20793 + *
20794 + * The DS area must be exposed to the user because this is the only
20795 + * way to report on the number of valid entries recorded by the CPU.
20796 + * This is required when the buffer is not full, i..e, there was not
20797 + * PMU interrupt.
20798 + *
20799 + * Layout of the structure is mandated by hardware and specified in
20800 + * the Intel documentation.
20801 + */
20802 +struct pfm_ds_area_p4 {
20803 + unsigned long bts_buf_base;
20804 + unsigned long bts_index;
20805 + unsigned long bts_abs_max;
20806 + unsigned long bts_intr_thres;
20807 + unsigned long pebs_buf_base;
20808 + unsigned long pebs_index;
20809 + unsigned long pebs_abs_max;
20810 + unsigned long pebs_intr_thres;
20811 + u64 pebs_cnt_reset;
20812 +};
20813 +
20814 +/*
20815 + * This header is at the beginning of the sampling buffer returned to the user.
20816 + *
20817 + * Because of PEBS alignement constraints, the actual PEBS buffer area does
20818 + * not necessarily begin right after the header. The hdr_start_offs must be
20819 + * used to compute the first byte of the buffer. The offset is defined as
20820 + * the number of bytes between the end of the header and the beginning of
20821 + * the buffer. As such the formula is:
20822 + * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
20823 + */
20824 +struct pfm_pebs_p4_smpl_hdr {
20825 + u64 overflows; /* #overflows for buffer */
20826 + size_t buf_size; /* bytes in the buffer */
20827 + size_t start_offs; /* actual buffer start offset */
20828 + u32 version; /* smpl format version */
20829 + u32 reserved1; /* for future use */
20830 + u64 reserved2[5]; /* for future use */
20831 + struct pfm_ds_area_p4 ds; /* data save area */
20832 +};
20833 +
20834 +/*
20835 + * 64-bit PEBS record format is described in
20836 + * http://www.intel.com/technology/64bitextensions/30083502.pdf
20837 + *
20838 + * The format does not peek at samples. The sample structure is only
20839 + * used to ensure that the buffer is large enough to accomodate one
20840 + * sample.
20841 + */
20842 +#ifdef __i386__
20843 +struct pfm_pebs_p4_smpl_entry {
20844 + u32 eflags;
20845 + u32 ip;
20846 + u32 eax;
20847 + u32 ebx;
20848 + u32 ecx;
20849 + u32 edx;
20850 + u32 esi;
20851 + u32 edi;
20852 + u32 ebp;
20853 + u32 esp;
20854 +};
20855 +#else
20856 +struct pfm_pebs_p4_smpl_entry {
20857 + u64 eflags;
20858 + u64 ip;
20859 + u64 eax;
20860 + u64 ebx;
20861 + u64 ecx;
20862 + u64 edx;
20863 + u64 esi;
20864 + u64 edi;
20865 + u64 ebp;
20866 + u64 esp;
20867 + u64 r8;
20868 + u64 r9;
20869 + u64 r10;
20870 + u64 r11;
20871 + u64 r12;
20872 + u64 r13;
20873 + u64 r14;
20874 + u64 r15;
20875 +};
20876 +#endif
20877 +
20878 +#define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U
20879 +#define PFM_PEBS_P4_SMPL_VERSION_MIN 0U
20880 +#define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\
20881 + (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff))
20882 +
20883 +#endif /* __PERFMON_PEBS_P4_SMPL_H__ */
20884 diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
20885 index da0a675..b3a6ae9 100644
20886 --- a/include/asm-x86/thread_info.h
20887 +++ b/include/asm-x86/thread_info.h
20888 @@ -71,6 +71,7 @@ struct thread_info {
20889 * Warning: layout of LSW is hardcoded in entry.S
20890 */
20891 #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
20892 +#define TIF_PERFMON_WORK 1 /* work for pfm_handle_work() */
20893 #define TIF_SIGPENDING 2 /* signal pending */
20894 #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
20895 #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
20896 @@ -91,6 +92,7 @@ struct thread_info {
20897 #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
20898 #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
20899 #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
20900 +#define TIF_PERFMON_CTXSW 28 /* perfmon needs ctxsw calls */
20901
20902 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
20903 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
20904 @@ -112,6 +114,8 @@ struct thread_info {
20905 #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
20906 #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
20907 #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
20908 +#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
20909 +#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
20910
20911 /* work to do in syscall_trace_enter() */
20912 #define _TIF_WORK_SYSCALL_ENTRY \
20913 @@ -133,12 +137,12 @@ struct thread_info {
20914
20915 /* Only used for 64 bit */
20916 #define _TIF_DO_NOTIFY_MASK \
20917 - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
20918 + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERFMON_WORK)
20919
20920 /* flags to check in __switch_to() */
20921 #define _TIF_WORK_CTXSW \
20922 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \
20923 - _TIF_NOTSC)
20924 + _TIF_NOTSC|_TIF_PERFMON_CTXSW)
20925
20926 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
20927 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
20928 diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
20929 index d739467..5d8cca1 100644
20930 --- a/include/asm-x86/unistd_32.h
20931 +++ b/include/asm-x86/unistd_32.h
20932 @@ -338,9 +338,23 @@
20933 #define __NR_dup3 330
20934 #define __NR_pipe2 331
20935 #define __NR_inotify_init1 332
20936 +#define __NR_pfm_create_context 333
20937 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
20938 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
20939 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
20940 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
20941 +#define __NR_pfm_start (__NR_pfm_create_context+5)
20942 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
20943 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
20944 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
20945 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
20946 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
20947 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
20948
20949 #ifdef __KERNEL__
20950
20951 +#define NR_syscalls 345
20952 +
20953 #define __ARCH_WANT_IPC_PARSE_VERSION
20954 #define __ARCH_WANT_OLD_READDIR
20955 #define __ARCH_WANT_OLD_STAT
20956 diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
20957 index 3a341d7..75dac98 100644
20958 --- a/include/asm-x86/unistd_64.h
20959 +++ b/include/asm-x86/unistd_64.h
20960 @@ -653,7 +653,30 @@ __SYSCALL(__NR_dup3, sys_dup3)
20961 __SYSCALL(__NR_pipe2, sys_pipe2)
20962 #define __NR_inotify_init1 294
20963 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
20964 -
20965 +#define __NR_pfm_create_context 295
20966 +__SYSCALL(__NR_pfm_create_context, sys_pfm_create_context)
20967 +#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
20968 +__SYSCALL(__NR_pfm_write_pmcs, sys_pfm_write_pmcs)
20969 +#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
20970 +__SYSCALL(__NR_pfm_write_pmds, sys_pfm_write_pmds)
20971 +#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
20972 + __SYSCALL(__NR_pfm_read_pmds, sys_pfm_read_pmds)
20973 +#define __NR_pfm_load_context (__NR_pfm_create_context+4)
20974 +__SYSCALL(__NR_pfm_load_context, sys_pfm_load_context)
20975 +#define __NR_pfm_start (__NR_pfm_create_context+5)
20976 +__SYSCALL(__NR_pfm_start, sys_pfm_start)
20977 +#define __NR_pfm_stop (__NR_pfm_create_context+6)
20978 +__SYSCALL(__NR_pfm_stop, sys_pfm_stop)
20979 +#define __NR_pfm_restart (__NR_pfm_create_context+7)
20980 +__SYSCALL(__NR_pfm_restart, sys_pfm_restart)
20981 +#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
20982 +__SYSCALL(__NR_pfm_create_evtsets, sys_pfm_create_evtsets)
20983 +#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
20984 +__SYSCALL(__NR_pfm_getinfo_evtsets, sys_pfm_getinfo_evtsets)
20985 +#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
20986 +__SYSCALL(__NR_pfm_delete_evtsets, sys_pfm_delete_evtsets)
20987 +#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
20988 +__SYSCALL(__NR_pfm_unload_context, sys_pfm_unload_context)
20989
20990 #ifndef __NO_STUBS
20991 #define __ARCH_WANT_OLD_READDIR
20992 diff --git a/include/linux/Kbuild b/include/linux/Kbuild
20993 index b68ec09..d37036a 100644
20994 --- a/include/linux/Kbuild
20995 +++ b/include/linux/Kbuild
20996 @@ -162,6 +162,8 @@ header-y += video_decoder.h
20997 header-y += video_encoder.h
20998 header-y += videotext.h
20999 header-y += x25.h
21000 +header-y += perfmon.h
21001 +header-y += perfmon_dfl_smpl.h
21002
21003 unifdef-y += acct.h
21004 unifdef-y += adb.h
21005 diff --git a/include/linux/perfmon.h b/include/linux/perfmon.h
21006 new file mode 100644
21007 index 0000000..5d9b977
21008 --- /dev/null
21009 +++ b/include/linux/perfmon.h
21010 @@ -0,0 +1,213 @@
21011 +/*
21012 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
21013 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21014 + *
21015 + * This program is free software; you can redistribute it and/or
21016 + * modify it under the terms of version 2 of the GNU General Public
21017 + * License as published by the Free Software Foundation.
21018 + *
21019 + * This program is distributed in the hope that it will be useful,
21020 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21021 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21022 + * General Public License for more details.
21023 + *
21024 + * You should have received a copy of the GNU General Public License
21025 + * along with this program; if not, write to the Free Software
21026 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21027 + * 02111-1307 USA
21028 + */
21029 +
21030 +#ifndef __LINUX_PERFMON_H__
21031 +#define __LINUX_PERFMON_H__
21032 +
21033 +/*
21034 + * This file contains all the user visible generic definitions for the
21035 + * interface. Model-specific user-visible definitions are located in
21036 + * the asm/perfmon.h file.
21037 + */
21038 +
21039 +/*
21040 + * include arch-specific user interface definitions
21041 + */
21042 +#include <asm/perfmon.h>
21043 +
21044 +/*
21045 + * defined by each arch
21046 + */
21047 +#define PFM_MAX_PMCS PFM_ARCH_MAX_PMCS
21048 +#define PFM_MAX_PMDS PFM_ARCH_MAX_PMDS
21049 +
21050 +/*
21051 + * number of elements for each type of bitvector
21052 + * all bitvectors use u64 fixed size type on all architectures.
21053 + */
21054 +#define PFM_BVSIZE(x) (((x)+(sizeof(__u64)<<3)-1) / (sizeof(__u64)<<3))
21055 +#define PFM_PMD_BV PFM_BVSIZE(PFM_MAX_PMDS)
21056 +#define PFM_PMC_BV PFM_BVSIZE(PFM_MAX_PMCS)
21057 +
21058 +/*
21059 + * register flags layout:
21060 + * bit[00-15] : generic flags
21061 + * bit[16-31] : arch-specific flags
21062 + *
21063 + * PFM_REGFL_NO_EMUL64: must be set on the PMC controlling the PMD
21064 + */
21065 +#define PFM_REGFL_OVFL_NOTIFY 0x1 /* PMD: send notification on event */
21066 +#define PFM_REGFL_RANDOM 0x2 /* PMD: randomize value after event */
21067 +#define PFM_REGFL_NO_EMUL64 0x4 /* PMC: no 64-bit emulation */
21068 +
21069 +/*
21070 + * event set flags layout:
21071 + * bits[00-15] : generic flags
21072 + * bits[16-31] : arch-specific flags (see asm/perfmon.h)
21073 + */
21074 +#define PFM_SETFL_OVFL_SWITCH 0x01 /* enable switch on overflow */
21075 +#define PFM_SETFL_TIME_SWITCH 0x02 /* enable switch on timeout */
21076 +
21077 +/*
21078 + * argument to pfm_create_context() system call
21079 + * structure shared with user level
21080 + */
21081 +struct pfarg_ctx {
21082 + __u32 ctx_flags; /* noblock/block/syswide */
21083 + __u32 ctx_reserved1; /* for future use */
21084 + __u64 ctx_reserved2[7]; /* for future use */
21085 +};
21086 +
21087 +/*
21088 + * context flags layout:
21089 + * bits[00-15]: generic flags
21090 + * bits[16-31]: arch-specific flags (see perfmon_const.h)
21091 + */
21092 +#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user notifications */
21093 +#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */
21094 +#define PFM_FL_OVFL_NO_MSG 0x80 /* no overflow msgs */
21095 +
21096 +/*
21097 + * argument to pfm_write_pmcs() system call.
21098 + * structure shared with user level
21099 + */
21100 +struct pfarg_pmc {
21101 + __u16 reg_num; /* which register */
21102 + __u16 reg_set; /* event set for this register */
21103 + __u32 reg_flags; /* REGFL flags */
21104 + __u64 reg_value; /* pmc value */
21105 + __u64 reg_reserved2[4]; /* for future use */
21106 +};
21107 +
21108 +/*
21109 + * argument to pfm_write_pmds() and pfm_read_pmds() system calls.
21110 + * structure shared with user level
21111 + */
21112 +struct pfarg_pmd {
21113 + __u16 reg_num; /* which register */
21114 + __u16 reg_set; /* event set for this register */
21115 + __u32 reg_flags; /* REGFL flags */
21116 + __u64 reg_value; /* initial pmc/pmd value */
21117 + __u64 reg_long_reset; /* value to reload after notification */
21118 + __u64 reg_short_reset; /* reset after counter overflow */
21119 + __u64 reg_last_reset_val; /* return: PMD last reset value */
21120 + __u64 reg_ovfl_switch_cnt; /* #overflows before switch */
21121 + __u64 reg_reset_pmds[PFM_PMD_BV]; /* reset on overflow */
21122 + __u64 reg_smpl_pmds[PFM_PMD_BV]; /* record in sample */
21123 + __u64 reg_smpl_eventid; /* opaque event identifier */
21124 + __u64 reg_random_mask; /* bitmask used to limit random value */
21125 + __u32 reg_random_seed; /* seed for randomization (OBSOLETE) */
21126 + __u32 reg_reserved2[7]; /* for future use */
21127 +};
21128 +
21129 +/*
21130 + * optional argument to pfm_start() system call. Pass NULL if not needed.
21131 + * structure shared with user level
21132 + */
21133 +struct pfarg_start {
21134 + __u16 start_set; /* event set to start with */
21135 + __u16 start_reserved1; /* for future use */
21136 + __u32 start_reserved2; /* for future use */
21137 + __u64 reserved3[3]; /* for future use */
21138 +};
21139 +
21140 +/*
21141 + * argument to pfm_load_context() system call.
21142 + * structure shared with user level
21143 + */
21144 +struct pfarg_load {
21145 + __u32 load_pid; /* thread or CPU to attach to */
21146 + __u16 load_set; /* set to load first */
21147 + __u16 load_reserved1; /* for future use */
21148 + __u64 load_reserved2[3]; /* for future use */
21149 +};
21150 +
21151 +/*
21152 + * argument to pfm_create_evtsets() and pfm_delete_evtsets() system calls.
21153 + * structure shared with user level.
21154 + */
21155 +struct pfarg_setdesc {
21156 + __u16 set_id; /* which set */
21157 + __u16 set_reserved1; /* for future use */
21158 + __u32 set_flags; /* SETFL flags */
21159 + __u64 set_timeout; /* switch timeout in nsecs */
21160 + __u64 reserved[6]; /* for future use */
21161 +};
21162 +
21163 +/*
21164 + * argument to pfm_getinfo_evtsets() system call.
21165 + * structure shared with user level
21166 + */
21167 +struct pfarg_setinfo {
21168 + __u16 set_id; /* which set */
21169 + __u16 set_reserved1; /* for future use */
21170 + __u32 set_flags; /* out: SETFL flags */
21171 + __u64 set_ovfl_pmds[PFM_PMD_BV]; /* out: last ovfl PMDs */
21172 + __u64 set_runs; /* out: #times the set was active */
21173 + __u64 set_timeout; /* out: eff/leftover timeout (nsecs) */
21174 + __u64 set_act_duration; /* out: time set was active in nsecs */
21175 + __u64 set_avail_pmcs[PFM_PMC_BV];/* out: available PMCs */
21176 + __u64 set_avail_pmds[PFM_PMD_BV];/* out: available PMDs */
21177 + __u64 set_reserved3[6]; /* for future use */
21178 +};
21179 +
21180 +/*
21181 + * default value for the user and group security parameters in
21182 + * /proc/sys/kernel/perfmon/sys_group
21183 + * /proc/sys/kernel/perfmon/task_group
21184 + */
21185 +#define PFM_GROUP_PERM_ANY -1 /* any user/group */
21186 +
21187 +/*
21188 + * overflow notification message.
21189 + * structure shared with user level
21190 + */
21191 +struct pfarg_ovfl_msg {
21192 + __u32 msg_type; /* message type: PFM_MSG_OVFL */
21193 + __u32 msg_ovfl_pid; /* process id */
21194 + __u16 msg_active_set; /* active set at overflow */
21195 + __u16 msg_ovfl_cpu; /* cpu of PMU interrupt */
21196 + __u32 msg_ovfl_tid; /* thread id */
21197 + __u64 msg_ovfl_ip; /* IP on PMU intr */
21198 + __u64 msg_ovfl_pmds[PFM_PMD_BV];/* overflowed PMDs */
21199 +};
21200 +
21201 +#define PFM_MSG_OVFL 1 /* an overflow happened */
21202 +#define PFM_MSG_END 2 /* task to which context was attached ended */
21203 +
21204 +/*
21205 + * generic notification message (union).
21206 + * union shared with user level
21207 + */
21208 +union pfarg_msg {
21209 + __u32 type;
21210 + struct pfarg_ovfl_msg pfm_ovfl_msg;
21211 +};
21212 +
21213 +/*
21214 + * perfmon version number
21215 + */
21216 +#define PFM_VERSION_MAJ 2U
21217 +#define PFM_VERSION_MIN 82U
21218 +#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|\
21219 + (PFM_VERSION_MIN & 0xffff))
21220 +#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff)
21221 +#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
21222 +
21223 +#endif /* __LINUX_PERFMON_H__ */
21224 diff --git a/include/linux/perfmon_dfl_smpl.h b/include/linux/perfmon_dfl_smpl.h
21225 new file mode 100644
21226 index 0000000..e0817a8
21227 --- /dev/null
21228 +++ b/include/linux/perfmon_dfl_smpl.h
21229 @@ -0,0 +1,78 @@
21230 +/*
21231 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
21232 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21233 + *
21234 + * This file implements the new dfl sampling buffer format
21235 + * for perfmon2 subsystem.
21236 + *
21237 + * This program is free software; you can redistribute it and/or
21238 + * modify it under the terms of version 2 of the GNU General Public
21239 + * License as published by the Free Software Foundation.
21240 + *
21241 + * This program is distributed in the hope that it will be useful,
21242 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21243 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21244 + * General Public License for more details.
21245 + *
21246 + * You should have received a copy of the GNU General Public License
21247 + * along with this program; if not, write to the Free Software
21248 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21249 + * 02111-1307 USA
21250 + */
21251 +#ifndef __PERFMON_DFL_SMPL_H__
21252 +#define __PERFMON_DFL_SMPL_H__ 1
21253 +
21254 +/*
21255 + * format specific parameters (passed at context creation)
21256 + */
21257 +struct pfm_dfl_smpl_arg {
21258 + __u64 buf_size; /* size of the buffer in bytes */
21259 + __u32 buf_flags; /* buffer specific flags */
21260 + __u32 reserved1; /* for future use */
21261 + __u64 reserved[6]; /* for future use */
21262 +};
21263 +
21264 +/*
21265 + * This header is at the beginning of the sampling buffer returned to the user.
21266 + * It is directly followed by the first record.
21267 + */
21268 +struct pfm_dfl_smpl_hdr {
21269 + __u64 hdr_count; /* how many valid entries */
21270 + __u64 hdr_cur_offs; /* current offset from top of buffer */
21271 + __u64 hdr_overflows; /* #overflows for buffer */
21272 + __u64 hdr_buf_size; /* bytes in the buffer */
21273 + __u64 hdr_min_buf_space;/* minimal buffer size (internal use) */
21274 + __u32 hdr_version; /* smpl format version */
21275 + __u32 hdr_buf_flags; /* copy of buf_flags */
21276 + __u64 hdr_reserved[10]; /* for future use */
21277 +};
21278 +
21279 +/*
21280 + * Entry header in the sampling buffer. The header is directly followed
21281 + * with the values of the PMD registers of interest saved in increasing
21282 + * index order: PMD4, PMD5, and so on. How many PMDs are present depends
21283 + * on how the session was programmed.
21284 + *
21285 + * In the case where multiple counters overflow at the same time, multiple
21286 + * entries are written consecutively.
21287 + *
21288 + * last_reset_value member indicates the initial value of the overflowed PMD.
21289 + */
21290 +struct pfm_dfl_smpl_entry {
21291 + __u32 pid; /* thread id (for NPTL, this is gettid()) */
21292 + __u16 ovfl_pmd; /* index of overflowed PMD for this sample */
21293 + __u16 reserved; /* for future use */
21294 + __u64 last_reset_val; /* initial value of overflowed PMD */
21295 + __u64 ip; /* where did the overflow intr happened */
21296 + __u64 tstamp; /* overflow timetamp */
21297 + __u16 cpu; /* cpu on which the overfow occurred */
21298 + __u16 set; /* event set active when overflow ocurred */
21299 + __u32 tgid; /* thread group id (getpid() for NPTL) */
21300 +};
21301 +
21302 +#define PFM_DFL_SMPL_VERSION_MAJ 1U
21303 +#define PFM_DFL_SMPL_VERSION_MIN 0U
21304 +#define PFM_DFL_SMPL_VERSION (((PFM_DFL_SMPL_VERSION_MAJ&0xffff)<<16)|\
21305 + (PFM_DFL_SMPL_VERSION_MIN & 0xffff))
21306 +
21307 +#endif /* __PERFMON_DFL_SMPL_H__ */
21308 diff --git a/include/linux/perfmon_fmt.h b/include/linux/perfmon_fmt.h
21309 new file mode 100644
21310 index 0000000..82a6a90
21311 --- /dev/null
21312 +++ b/include/linux/perfmon_fmt.h
21313 @@ -0,0 +1,74 @@
21314 +/*
21315 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
21316 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21317 + *
21318 + * Interface for custom sampling buffer format modules
21319 + *
21320 + * This program is free software; you can redistribute it and/or
21321 + * modify it under the terms of version 2 of the GNU General Public
21322 + * License as published by the Free Software Foundation.
21323 + *
21324 + * This program is distributed in the hope that it will be useful,
21325 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21326 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21327 + * General Public License for more details.
21328 + *
21329 + * You should have received a copy of the GNU General Public License
21330 + * along with this program; if not, write to the Free Software
21331 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21332 + * 02111-1307 USA
21333 + */
21334 +#ifndef __PERFMON_FMT_H__
21335 +#define __PERFMON_FMT_H__ 1
21336 +
21337 +#include <linux/kobject.h>
21338 +
21339 +typedef int (*fmt_validate_t)(u32 flags, u16 npmds, void *arg);
21340 +typedef int (*fmt_getsize_t)(u32 flags, void *arg, size_t *size);
21341 +typedef int (*fmt_init_t)(struct pfm_context *ctx, void *buf, u32 flags,
21342 + u16 nmpds, void *arg);
21343 +typedef int (*fmt_restart_t)(int is_active, u32 *ovfl_ctrl, void *buf);
21344 +typedef int (*fmt_exit_t)(void *buf);
21345 +typedef int (*fmt_handler_t)(struct pfm_context *ctx,
21346 + unsigned long ip, u64 stamp, void *data);
21347 +
21348 +struct pfm_smpl_fmt {
21349 + char *fmt_name; /* name of the format (required) */
21350 + size_t fmt_arg_size; /* size of fmt args for ctx create */
21351 + u32 fmt_flags; /* format specific flags */
21352 + u32 fmt_version; /* format version number */
21353 +
21354 + fmt_validate_t fmt_validate; /* validate context flags */
21355 + fmt_getsize_t fmt_getsize; /* get size for sampling buffer */
21356 + fmt_init_t fmt_init; /* initialize buffer area */
21357 + fmt_handler_t fmt_handler; /* overflow handler (required) */
21358 + fmt_restart_t fmt_restart; /* restart after notification */
21359 + fmt_exit_t fmt_exit; /* context termination */
21360 +
21361 + struct list_head fmt_list; /* internal use only */
21362 +
21363 + struct kobject kobj; /* sysfs internal use only */
21364 + struct module *owner; /* pointer to module owner */
21365 + u32 fmt_qdepth; /* Max notify queue depth (required) */
21366 +};
21367 +#define to_smpl_fmt(n) container_of(n, struct pfm_smpl_fmt, kobj)
21368 +
21369 +#define PFM_FMTFL_IS_BUILTIN 0x1 /* fmt is compiled in */
21370 +/*
21371 + * we need to know whether the format is builtin or compiled
21372 + * as a module
21373 + */
21374 +#ifdef MODULE
21375 +#define PFM_FMT_BUILTIN_FLAG 0 /* not built as a module */
21376 +#else
21377 +#define PFM_FMT_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */
21378 +#endif
21379 +
21380 +int pfm_fmt_register(struct pfm_smpl_fmt *fmt);
21381 +int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt);
21382 +void pfm_sysfs_builtin_fmt_add(void);
21383 +
21384 +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt);
21385 +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt);
21386 +
21387 +#endif /* __PERFMON_FMT_H__ */
21388 diff --git a/include/linux/perfmon_kern.h b/include/linux/perfmon_kern.h
21389 new file mode 100644
21390 index 0000000..6c3b527
21391 --- /dev/null
21392 +++ b/include/linux/perfmon_kern.h
21393 @@ -0,0 +1,551 @@
21394 +/*
21395 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
21396 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21397 + *
21398 + * This program is free software; you can redistribute it and/or
21399 + * modify it under the terms of version 2 of the GNU General Public
21400 + * License as published by the Free Software Foundation.
21401 + *
21402 + * This program is distributed in the hope that it will be useful,
21403 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21404 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21405 + * General Public License for more details.
21406 + *
21407 + * You should have received a copy of the GNU General Public License
21408 + * along with this program; if not, write to the Free Software
21409 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21410 + * 02111-1307 USA
21411 + */
21412 +
21413 +#ifndef __LINUX_PERFMON_KERN_H__
21414 +#define __LINUX_PERFMON_KERN_H__
21415 +/*
21416 + * This file contains all the definitions of data structures, variables, macros
21417 + * that are to be shared between generic code and arch-specific code
21418 + *
21419 + * For generic only definitions, use perfmon/perfmon_priv.h
21420 + */
21421 +#ifdef CONFIG_PERFMON
21422 +
21423 +#include <linux/file.h>
21424 +#include <linux/sched.h>
21425 +#include <linux/perfmon.h>
21426 +
21427 +/*
21428 + * system adminstrator configuration controls available via
21429 + * the /sys/kerne/perfmon interface
21430 + */
21431 +struct pfm_controls {
21432 + u32 debug; /* debugging control bitmask */
21433 + gid_t sys_group; /* gid to create a syswide context */
21434 + gid_t task_group; /* gid to create a per-task context */
21435 + u32 flags; /* control flags (see below) */
21436 + size_t arg_mem_max; /* maximum vector argument size */
21437 + size_t smpl_buffer_mem_max; /* max buf mem, -1 for infinity */
21438 +};
21439 +extern struct pfm_controls pfm_controls;
21440 +
21441 +/*
21442 + * control flags
21443 + */
21444 +#define PFM_CTRL_FL_RW_EXPERT 0x1 /* bypass reserved fields on read/write */
21445 +
21446 +/*
21447 + * software PMD
21448 + */
21449 +struct pfm_pmd {
21450 + u64 value; /* 64-bit value */
21451 + u64 lval; /* last reset value */
21452 + u64 ovflsw_thres; /* #ovfls left before switch */
21453 + u64 long_reset; /* long reset value on overflow */
21454 + u64 short_reset; /* short reset value on overflow */
21455 + u64 reset_pmds[PFM_PMD_BV]; /* pmds to reset on overflow */
21456 + u64 smpl_pmds[PFM_PMD_BV]; /* pmds to record on overflow */
21457 + u64 mask; /* range mask for random value */
21458 + u64 ovflsw_ref_thres; /* #ovfls before next set */
21459 + u64 eventid; /* opaque event identifier */
21460 + u32 flags; /* notify/do not notify */
21461 +};
21462 +
21463 +/*
21464 + * event_set: encapsulates the full PMU state
21465 + */
21466 +struct pfm_event_set {
21467 + struct list_head list; /* ordered chain of sets */
21468 + u16 id; /* set identification */
21469 + u16 nused_pmds; /* max number of used PMDs */
21470 + u16 nused_pmcs; /* max number of used PMCs */
21471 + u16 pad1; /* paddding */
21472 + u32 flags; /* public flags */
21473 + u32 priv_flags; /* private flags (see below) */
21474 + u64 runs; /* # of activations */
21475 + u32 npend_ovfls; /* number of pending PMD overflow */
21476 + u32 pad2; /* padding */
21477 + u64 used_pmds[PFM_PMD_BV]; /* used PMDs */
21478 + u64 povfl_pmds[PFM_PMD_BV]; /* pending overflowed PMDs */
21479 + u64 ovfl_pmds[PFM_PMD_BV]; /* last overflowed PMDs */
21480 + u64 reset_pmds[PFM_PMD_BV]; /* PMDs to reset after overflow */
21481 + u64 ovfl_notify[PFM_PMD_BV]; /* notify on overflow */
21482 + u64 used_pmcs[PFM_PMC_BV]; /* used PMCs */
21483 + u64 pmcs[PFM_MAX_PMCS]; /* PMC values */
21484 +
21485 + struct pfm_pmd pmds[PFM_MAX_PMDS];
21486 +
21487 + ktime_t hrtimer_exp; /* switch timeout reference */
21488 + ktime_t hrtimer_rem; /* per-thread remainder timeout */
21489 +
21490 + u64 duration_start; /* start time in ns */
21491 + u64 duration; /* total active ns */
21492 +};
21493 +
21494 +/*
21495 + * common private event set flags (priv_flags)
21496 + *
21497 + * upper 16 bits: for arch-specific use
21498 + * lower 16 bits: for common use
21499 + */
21500 +#define PFM_SETFL_PRIV_MOD_PMDS 0x1 /* PMD register(s) modified */
21501 +#define PFM_SETFL_PRIV_MOD_PMCS 0x2 /* PMC register(s) modified */
21502 +#define PFM_SETFL_PRIV_SWITCH 0x4 /* must switch set on restart */
21503 +#define PFM_SETFL_PRIV_MOD_BOTH (PFM_SETFL_PRIV_MOD_PMDS \
21504 + | PFM_SETFL_PRIV_MOD_PMCS)
21505 +
21506 +/*
21507 + * context flags
21508 + */
21509 +struct pfm_context_flags {
21510 + unsigned int block:1; /* task blocks on user notifications */
21511 + unsigned int system:1; /* do system wide monitoring */
21512 + unsigned int no_msg:1; /* no message sent on overflow */
21513 + unsigned int switch_ovfl:1; /* switch set on counter ovfl */
21514 + unsigned int switch_time:1; /* switch set on timeout */
21515 + unsigned int started:1; /* pfm_start() issued */
21516 + unsigned int work_type:2; /* type of work for pfm_handle_work */
21517 + unsigned int mmap_nlock:1; /* no lock in pfm_release_buf_space */
21518 + unsigned int ia64_v20_compat:1; /* context is IA-64 v2.0 mode */
21519 + unsigned int can_restart:8; /* allowed to issue a PFM_RESTART */
21520 + unsigned int reset_count:8; /* number of pending resets */
21521 + unsigned int is_self:1; /* per-thread and self-montoring */
21522 + unsigned int reserved:5; /* for future use */
21523 +};
21524 +
21525 +/*
21526 + * values for work_type (TIF_PERFMON_WORK must be set)
21527 + */
21528 +#define PFM_WORK_NONE 0 /* nothing to do */
21529 +#define PFM_WORK_RESET 1 /* reset overflowed counters */
21530 +#define PFM_WORK_BLOCK 2 /* block current thread */
21531 +#define PFM_WORK_ZOMBIE 3 /* cleanup zombie context */
21532 +
21533 +/*
21534 + * overflow description argument passed to sampling format
21535 + */
21536 +struct pfm_ovfl_arg {
21537 + u16 ovfl_pmd; /* index of overflowed PMD */
21538 + u16 active_set; /* set active at the time of the overflow */
21539 + u32 ovfl_ctrl; /* control flags */
21540 + u64 pmd_last_reset; /* last reset value of overflowed PMD */
21541 + u64 smpl_pmds_values[PFM_MAX_PMDS]; /* values of other PMDs */
21542 + u64 pmd_eventid; /* eventid associated with PMD */
21543 + u16 num_smpl_pmds; /* number of PMDS in smpl_pmd_values */
21544 +};
21545 +/*
21546 + * depth of message queue
21547 + *
21548 + * Depth cannot be bigger than 255 (see reset_count)
21549 + */
21550 +#define PFM_MSGS_ORDER 3 /* log2(number of messages) */
21551 +#define PFM_MSGS_COUNT (1<<PFM_MSGS_ORDER) /* number of messages */
21552 +#define PFM_MSGQ_MASK (PFM_MSGS_COUNT-1)
21553 +
21554 +/*
21555 + * perfmon context state
21556 + */
21557 +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
21558 +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
21559 +#define PFM_CTX_MASKED 3 /* context is loaded, monitoring is masked */
21560 +#define PFM_CTX_ZOMBIE 4 /* context lost owner but still attached */
21561 +
21562 +/*
21563 + * registers description
21564 + */
21565 +struct pfm_regdesc {
21566 + u64 pmcs[PFM_PMC_BV]; /* available PMC */
21567 + u64 pmds[PFM_PMD_BV]; /* available PMD */
21568 + u64 rw_pmds[PFM_PMD_BV]; /* available RW PMD */
21569 + u64 intr_pmds[PFM_PMD_BV]; /* PMD generating intr */
21570 + u64 cnt_pmds[PFM_PMD_BV]; /* PMD counters */
21571 + u16 max_pmc; /* highest+1 avail PMC */
21572 + u16 max_pmd; /* highest+1 avail PMD */
21573 + u16 max_rw_pmd; /* highest+1 avail RW PMD */
21574 + u16 first_intr_pmd; /* first intr PMD */
21575 + u16 max_intr_pmd; /* highest+1 intr PMD */
21576 + u16 num_rw_pmd; /* number of avail RW PMD */
21577 + u16 num_pmcs; /* number of logical PMCS */
21578 + u16 num_pmds; /* number of logical PMDS */
21579 + u16 num_counters; /* number of counting PMD */
21580 +};
21581 +
21582 +/*
21583 + * context: contains all the state of a session
21584 + */
21585 +struct pfm_context {
21586 + spinlock_t lock; /* context protection */
21587 +
21588 + struct pfm_context_flags flags;
21589 + u32 state; /* current state */
21590 + struct task_struct *task; /* attached task */
21591 +
21592 + struct completion restart_complete;/* block on notification */
21593 + u64 last_act; /* last activation */
21594 + u32 last_cpu; /* last CPU used (SMP only) */
21595 + u32 cpu; /* cpu bound to context */
21596 +
21597 + struct pfm_smpl_fmt *smpl_fmt; /* sampling format callbacks */
21598 + void *smpl_addr; /* user smpl buffer base */
21599 + size_t smpl_size; /* user smpl buffer size */
21600 + void *smpl_real_addr;/* actual smpl buffer base */
21601 + size_t smpl_real_size; /* actual smpl buffer size */
21602 +
21603 + wait_queue_head_t msgq_wait; /* pfm_read() wait queue */
21604 +
21605 + union pfarg_msg msgq[PFM_MSGS_COUNT];
21606 + int msgq_head;
21607 + int msgq_tail;
21608 +
21609 + struct fasync_struct *async_queue; /* async notification */
21610 +
21611 + struct pfm_event_set *active_set; /* active set */
21612 + struct list_head set_list; /* ordered list of sets */
21613 +
21614 + struct pfm_regdesc regs; /* registers available to context */
21615 +
21616 + /*
21617 + * save stack space by allocating temporary variables for
21618 + * pfm_overflow_handler() in pfm_context
21619 + */
21620 + struct pfm_ovfl_arg ovfl_arg;
21621 + u64 tmp_ovfl_notify[PFM_PMD_BV];
21622 +};
21623 +
21624 +/*
21625 + * ovfl_ctrl bitmask (used by interrupt handler)
21626 + */
21627 +#define PFM_OVFL_CTRL_NOTIFY 0x1 /* notify user */
21628 +#define PFM_OVFL_CTRL_RESET 0x2 /* reset overflowed pmds */
21629 +#define PFM_OVFL_CTRL_MASK 0x4 /* mask monitoring */
21630 +#define PFM_OVFL_CTRL_SWITCH 0x8 /* switch sets */
21631 +
21632 +/*
21633 + * logging
21634 + */
21635 +#define PFM_ERR(f, x...) printk(KERN_ERR "perfmon: " f "\n", ## x)
21636 +#define PFM_WARN(f, x...) printk(KERN_WARNING "perfmon: " f "\n", ## x)
21637 +#define PFM_LOG(f, x...) printk(KERN_NOTICE "perfmon: " f "\n", ## x)
21638 +#define PFM_INFO(f, x...) printk(KERN_INFO "perfmon: " f "\n", ## x)
21639 +
21640 +/*
21641 + * debugging
21642 + *
21643 + * Printk rate limiting is enforced to avoid getting flooded with too many
21644 + * error messages on the console (which could render the machine unresponsive).
21645 + * To get full debug output (turn off ratelimit):
21646 + * $ echo 0 >/proc/sys/kernel/printk_ratelimit
21647 + *
21648 + * debug is a bitmask where bits are defined as follows:
21649 + * bit 0: enable non-interrupt code degbug messages
21650 + * bit 1: enable interrupt code debug messages
21651 + */
21652 +#ifdef CONFIG_PERFMON_DEBUG
21653 +#define _PFM_DBG(lm, f, x...) \
21654 + do { \
21655 + if (unlikely((pfm_controls.debug & lm) && printk_ratelimit())) { \
21656 + preempt_disable(); \
21657 + printk("perfmon: %s.%d: CPU%d [%d]: " f "\n", \
21658 + __func__, __LINE__, \
21659 + smp_processor_id(), current->pid , ## x); \
21660 + preempt_enable(); \
21661 + } \
21662 + } while (0)
21663 +
21664 +#define PFM_DBG(f, x...) _PFM_DBG(0x1, f, ##x)
21665 +#define PFM_DBG_ovfl(f, x...) _PFM_DBG(0x2, f, ## x)
21666 +#else
21667 +#define PFM_DBG(f, x...) do {} while (0)
21668 +#define PFM_DBG_ovfl(f, x...) do {} while (0)
21669 +#endif
21670 +
21671 +extern struct pfm_pmu_config *pfm_pmu_conf;
21672 +extern int perfmon_disabled;
21673 +
21674 +static inline struct pfm_arch_context *pfm_ctx_arch(struct pfm_context *c)
21675 +{
21676 + return (struct pfm_arch_context *)(c+1);
21677 +}
21678 +
21679 +int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
21680 + void **req, void **to_free);
21681 +
21682 +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *uaddr, size_t usize,
21683 + void **arg, struct pfm_smpl_fmt **fmt);
21684 +
21685 +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req,
21686 + int count);
21687 +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
21688 + int compat);
21689 +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count);
21690 +
21691 +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *req,
21692 + struct task_struct *task);
21693 +int __pfm_unload_context(struct pfm_context *ctx, int *can_release);
21694 +
21695 +int __pfm_stop(struct pfm_context *ctx, int *release_info);
21696 +int __pfm_restart(struct pfm_context *ctx, int *unblock);
21697 +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start);
21698 +
21699 +void pfm_free_context(struct pfm_context *ctx);
21700 +
21701 +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size);
21702 +
21703 +int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
21704 + unsigned long *flags, void **resume);
21705 +/*
21706 + * check_mask bitmask values for pfm_check_task_state()
21707 + */
21708 +#define PFM_CMD_STOPPED 0x01 /* command needs thread stopped */
21709 +#define PFM_CMD_UNLOADED 0x02 /* command needs ctx unloaded */
21710 +#define PFM_CMD_UNLOAD 0x04 /* command is unload */
21711 +
21712 +int __pfm_create_context(struct pfarg_ctx *req,
21713 + struct pfm_smpl_fmt *fmt,
21714 + void *fmt_arg,
21715 + int mode,
21716 + struct pfm_context **new_ctx);
21717 +
21718 +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id,
21719 + int alloc);
21720 +
21721 +int pfm_pmu_conf_get(int autoload);
21722 +void pfm_pmu_conf_put(void);
21723 +
21724 +int pfm_session_allcpus_acquire(void);
21725 +void pfm_session_allcpus_release(void);
21726 +
21727 +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize);
21728 +void pfm_smpl_buf_free(struct pfm_context *ctx);
21729 +
21730 +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name);
21731 +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt);
21732 +
21733 +void pfm_interrupt_handler(unsigned long iip, struct pt_regs *regs);
21734 +
21735 +void pfm_resume_task(struct task_struct *t, void *data);
21736 +
21737 +#include <linux/perfmon_pmu.h>
21738 +#include <linux/perfmon_fmt.h>
21739 +
21740 +extern const struct file_operations pfm_file_ops;
21741 +/*
21742 + * upper limit for count in calls that take vector arguments. This is used
21743 + * to prevent for multiplication overflow when we compute actual storage size
21744 + */
21745 +#define PFM_MAX_ARG_COUNT(m) (INT_MAX/sizeof(*(m)))
21746 +
21747 +#define cast_ulp(_x) ((unsigned long *)_x)
21748 +
21749 +#define PFM_NORMAL 0
21750 +#define PFM_COMPAT 1
21751 +
21752 +void __pfm_exit_thread(void);
21753 +void pfm_ctxsw_in(struct task_struct *prev, struct task_struct *next);
21754 +void pfm_ctxsw_out(struct task_struct *prev, struct task_struct *next);
21755 +void pfm_handle_work(struct pt_regs *regs);
21756 +void __pfm_init_percpu(void *dummy);
21757 +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
21758 +
21759 +static inline void pfm_exit_thread(void)
21760 +{
21761 + if (current->pfm_context)
21762 + __pfm_exit_thread();
21763 +}
21764 +
21765 +/*
21766 + * include arch-specific kernel level definitions
21767 + */
21768 +#include <asm/perfmon_kern.h>
21769 +
21770 +static inline void pfm_copy_thread(struct task_struct *task)
21771 +{
21772 + /*
21773 + * context or perfmon TIF state is NEVER inherited
21774 + * in child task. Holds for per-thread and system-wide
21775 + */
21776 + task->pfm_context = NULL;
21777 + clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
21778 + clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
21779 + pfm_arch_disarm_handle_work(task);
21780 +}
21781 +
21782 +
21783 +/*
21784 + * read a single PMD register.
21785 + *
21786 + * virtual PMD registers have special handler.
21787 + * Depends on definitions in asm/perfmon_kern.h
21788 + */
21789 +static inline u64 pfm_read_pmd(struct pfm_context *ctx, unsigned int cnum)
21790 +{
21791 + if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V))
21792 + return pfm_pmu_conf->pmd_sread(ctx, cnum);
21793 +
21794 + return pfm_arch_read_pmd(ctx, cnum);
21795 +}
21796 +/*
21797 + * write a single PMD register.
21798 + *
21799 + * virtual PMD registers have special handler.
21800 + * Depends on definitions in asm/perfmon_kern.h
21801 + */
21802 +static inline void pfm_write_pmd(struct pfm_context *ctx, unsigned int cnum,
21803 + u64 value)
21804 +{
21805 + /*
21806 + * PMD writes are ignored for read-only registers
21807 + */
21808 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_RO)
21809 + return;
21810 +
21811 + if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V) {
21812 + pfm_pmu_conf->pmd_swrite(ctx, cnum, value);
21813 + return;
21814 + }
21815 + /*
21816 + * clear unimplemented bits
21817 + */
21818 + value &= ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
21819 +
21820 + pfm_arch_write_pmd(ctx, cnum, value);
21821 +}
21822 +
21823 +void __pfm_init_percpu(void *dummy);
21824 +
21825 +static inline void pfm_init_percpu(void)
21826 +{
21827 + __pfm_init_percpu(NULL);
21828 +}
21829 +
21830 +/*
21831 + * pfm statistics are available via debugfs
21832 + * and perfmon subdir.
21833 + *
21834 + * When adding/removing new stats, make sure you also
21835 + * update the name table in perfmon_debugfs.c
21836 + */
21837 +enum pfm_stats_names {
21838 + PFM_ST_ovfl_intr_all_count = 0,
21839 + PFM_ST_ovfl_intr_ns,
21840 + PFM_ST_ovfl_intr_spurious_count,
21841 + PFM_ST_ovfl_intr_replay_count,
21842 + PFM_ST_ovfl_intr_regular_count,
21843 + PFM_ST_handle_work_count,
21844 + PFM_ST_ovfl_notify_count,
21845 + PFM_ST_reset_pmds_count,
21846 + PFM_ST_pfm_restart_count,
21847 + PFM_ST_fmt_handler_calls,
21848 + PFM_ST_fmt_handler_ns,
21849 + PFM_ST_set_switch_count,
21850 + PFM_ST_set_switch_ns,
21851 + PFM_ST_set_switch_exp,
21852 + PFM_ST_ctxswin_count,
21853 + PFM_ST_ctxswin_ns,
21854 + PFM_ST_handle_timeout_count,
21855 + PFM_ST_ovfl_intr_nmi_count,
21856 + PFM_ST_ctxswout_count,
21857 + PFM_ST_ctxswout_ns,
21858 + PFM_ST_LAST /* last entry marked */
21859 +};
21860 +#define PFM_NUM_STATS PFM_ST_LAST
21861 +
21862 +struct pfm_stats {
21863 + u64 v[PFM_NUM_STATS];
21864 + struct dentry *dirs[PFM_NUM_STATS];
21865 + struct dentry *cpu_dir;
21866 + char cpu_name[8];
21867 +};
21868 +
21869 +#ifdef CONFIG_PERFMON_DEBUG_FS
21870 +#define pfm_stats_get(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]
21871 +#define pfm_stats_inc(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]++
21872 +#define pfm_stats_add(x, y) __get_cpu_var(pfm_stats).v[PFM_ST_##x] += (y)
21873 +void pfm_reset_stats(int cpu);
21874 +#else
21875 +#define pfm_stats_get(x)
21876 +#define pfm_stats_inc(x)
21877 +#define pfm_stats_add(x, y)
21878 +static inline void pfm_reset_stats(int cpu)
21879 +{}
21880 +#endif
21881 +
21882 +
21883 +
21884 +DECLARE_PER_CPU(struct pfm_context *, pmu_ctx);
21885 +DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
21886 +DECLARE_PER_CPU(struct task_struct *, pmu_owner);
21887 +
21888 +void pfm_cpu_disable(void);
21889 +
21890 +
21891 +/*
21892 + * max vector argument elements for local storage (no kmalloc/kfree)
21893 + * The PFM_ARCH_PM*_ARG should be defined in perfmon_kern.h.
21894 + * If not, default (conservative) values are used
21895 + */
21896 +#ifndef PFM_ARCH_PMC_STK_ARG
21897 +#define PFM_ARCH_PMC_STK_ARG 1
21898 +#endif
21899 +
21900 +#ifndef PFM_ARCH_PMD_STK_ARG
21901 +#define PFM_ARCH_PMD_STK_ARG 1
21902 +#endif
21903 +
21904 +#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG
21905 +#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG
21906 +
21907 +#else /* !CONFIG_PERFMON */
21908 +
21909 +
21910 +/*
21911 + * perfmon hooks are nops when CONFIG_PERFMON is undefined
21912 + */
21913 +static inline void pfm_cpu_disable(void)
21914 +{}
21915 +
21916 +static inline void pfm_exit_thread(void)
21917 +{}
21918 +
21919 +static inline void pfm_handle_work(struct pt_regs *regs)
21920 +{}
21921 +
21922 +static inline void pfm_copy_thread(struct task_struct *t)
21923 +{}
21924 +
21925 +static inline void pfm_ctxsw_in(struct task_struct *p, struct task_struct *n)
21926 +{}
21927 +
21928 +static inline void pfm_ctxsw_out(struct task_struct *p, struct task_struct *n)
21929 +{}
21930 +
21931 +static inline void pfm_session_allcpus_release(void)
21932 +{}
21933 +
21934 +static inline int pfm_session_allcpus_acquire(void)
21935 +{
21936 + return 0;
21937 +}
21938 +
21939 +static inline void pfm_init_percpu(void)
21940 +{}
21941 +
21942 +#endif /* CONFIG_PERFMON */
21943 +
21944 +#endif /* __LINUX_PERFMON_KERN_H__ */
21945 diff --git a/include/linux/perfmon_pmu.h b/include/linux/perfmon_pmu.h
21946 new file mode 100644
21947 index 0000000..3f5f9e8
21948 --- /dev/null
21949 +++ b/include/linux/perfmon_pmu.h
21950 @@ -0,0 +1,192 @@
21951 +/*
21952 + * Copyright (c) 2006 Hewlett-Packard Development Company, L.P.
21953 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
21954 + *
21955 + * Interface for PMU description modules
21956 + *
21957 + * This program is free software; you can redistribute it and/or
21958 + * modify it under the terms of version 2 of the GNU General Public
21959 + * License as published by the Free Software Foundation.
21960 + *
21961 + * This program is distributed in the hope that it will be useful,
21962 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
21963 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21964 + * General Public License for more details.
21965 + *
21966 + * You should have received a copy of the GNU General Public License
21967 + * along with this program; if not, write to the Free Software
21968 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21969 + * 02111-1307 USA
21970 + */
21971 +#ifndef __PERFMON_PMU_H__
21972 +#define __PERFMON_PMU_H__ 1
21973 +
21974 +/*
21975 + * generic information about a PMC or PMD register
21976 + *
21977 + * Dependency bitmasks:
21978 + * They are used to allow lazy save/restore in the context switch
21979 + * code. To avoid picking up stale configuration from a previous
21980 + * thread. Usng the bitmask, the generic read/write routines can
21981 + * ensure that all registers needed to support the measurement are
21982 + * restored properly on context switch in.
21983 + */
21984 +struct pfm_regmap_desc {
21985 + u16 type; /* role of the register */
21986 + u16 reserved1; /* for future use */
21987 + u32 reserved2; /* for future use */
21988 + u64 dfl_val; /* power-on default value (quiescent) */
21989 + u64 rsvd_msk; /* reserved bits: 1 means reserved */
21990 + u64 no_emul64_msk; /* bits to clear for PFM_REGFL_NO_EMUL64 */
21991 + unsigned long hw_addr; /* HW register address or index */
21992 + struct kobject kobj; /* for internal use only */
21993 + char *desc; /* HW register description string */
21994 + u64 dep_pmcs[PFM_PMC_BV];/* depending PMC registers */
21995 +};
21996 +#define to_reg(n) container_of(n, struct pfm_regmap_desc, kobj)
21997 +
21998 +/*
21999 + * pfm_reg_desc helper macros
22000 + */
22001 +#define PMC_D(t, d, v, r, n, h) \
22002 + { .type = t, \
22003 + .desc = d, \
22004 + .dfl_val = v, \
22005 + .rsvd_msk = r, \
22006 + .no_emul64_msk = n, \
22007 + .hw_addr = h \
22008 + }
22009 +
22010 +#define PMD_D(t, d, h) \
22011 + { .type = t, \
22012 + .desc = d, \
22013 + .rsvd_msk = 0, \
22014 + .no_emul64_msk = 0, \
22015 + .hw_addr = h \
22016 + }
22017 +
22018 +#define PMD_DR(t, d, h, r) \
22019 + { .type = t, \
22020 + .desc = d, \
22021 + .rsvd_msk = r, \
22022 + .no_emul64_msk = 0, \
22023 + .hw_addr = h \
22024 + }
22025 +
22026 +#define PMX_NA \
22027 + { .type = PFM_REG_NA }
22028 +
22029 +#define PMD_DP(t, d, h, p) \
22030 + { .type = t, \
22031 + .desc = d, \
22032 + .rsvd_msk = 0, \
22033 + .no_emul64_msk = 0, \
22034 + .dep_pmcs[0] = p, \
22035 + .hw_addr = h \
22036 + }
22037 +
22038 +/*
22039 + * type of a PMU register (16-bit bitmask) for use with pfm_reg_desc.type
22040 + */
22041 +#define PFM_REG_NA 0x00 /* not avail. (not impl.,no access) must be 0 */
22042 +#define PFM_REG_I 0x01 /* PMC/PMD: implemented */
22043 +#define PFM_REG_WC 0x02 /* PMC: has write_checker */
22044 +#define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */
22045 +#define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */
22046 +#define PFM_REG_V 0x10 /* PMD: virtual reg */
22047 +#define PFM_REG_INTR 0x20 /* PMD: register can generate interrupt */
22048 +#define PFM_REG_SYS 0x40 /* PMC/PMD: register is for system-wide only */
22049 +#define PFM_REG_THR 0x80 /* PMC/PMD: register is for per-thread only */
22050 +#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */
22051 +
22052 +/*
22053 + * define some shortcuts for common types
22054 + */
22055 +#define PFM_REG_W (PFM_REG_WC|PFM_REG_I)
22056 +#define PFM_REG_W64 (PFM_REG_WC|PFM_REG_NO64|PFM_REG_I)
22057 +#define PFM_REG_C (PFM_REG_C64|PFM_REG_INTR|PFM_REG_I)
22058 +#define PFM_REG_I64 (PFM_REG_NO64|PFM_REG_I)
22059 +#define PFM_REG_IRO (PFM_REG_I|PFM_REG_RO)
22060 +
22061 +typedef int (*pfm_pmc_check_t)(struct pfm_context *ctx,
22062 + struct pfm_event_set *set,
22063 + struct pfarg_pmc *req);
22064 +
22065 +typedef int (*pfm_pmd_check_t)(struct pfm_context *ctx,
22066 + struct pfm_event_set *set,
22067 + struct pfarg_pmd *req);
22068 +
22069 +
22070 +typedef u64 (*pfm_sread_t)(struct pfm_context *ctx, unsigned int cnum);
22071 +typedef void (*pfm_swrite_t)(struct pfm_context *ctx, unsigned int cnum, u64 val);
22072 +
22073 +/*
22074 + * structure used by pmu description modules
22075 + *
22076 + * probe_pmu() routine return value:
22077 + * - 1 means recognized PMU
22078 + * - 0 means not recognized PMU
22079 + */
22080 +struct pfm_pmu_config {
22081 + char *pmu_name; /* PMU family name */
22082 + char *version; /* config module version */
22083 +
22084 + int counter_width; /* width of hardware counter */
22085 +
22086 + struct pfm_regmap_desc *pmc_desc; /* PMC register descriptions */
22087 + struct pfm_regmap_desc *pmd_desc; /* PMD register descriptions */
22088 +
22089 + pfm_pmc_check_t pmc_write_check;/* write checker (optional) */
22090 + pfm_pmd_check_t pmd_write_check;/* write checker (optional) */
22091 + pfm_pmd_check_t pmd_read_check; /* read checker (optional) */
22092 +
22093 + pfm_sread_t pmd_sread; /* virtual pmd read */
22094 + pfm_swrite_t pmd_swrite; /* virtual pmd write */
22095 +
22096 + int (*probe_pmu)(void);/* probe PMU routine */
22097 +
22098 + u16 num_pmc_entries;/* #entries in pmc_desc */
22099 + u16 num_pmd_entries;/* #entries in pmd_desc */
22100 +
22101 + void *pmu_info; /* model-specific infos */
22102 + u32 flags; /* set of flags */
22103 +
22104 + struct module *owner; /* pointer to module struct */
22105 +
22106 + /*
22107 + * fields computed internally, do not set in module
22108 + */
22109 + struct pfm_regdesc regs_all; /* regs available to all */
22110 + struct pfm_regdesc regs_thr; /* regs avail per-thread */
22111 + struct pfm_regdesc regs_sys; /* regs avail system-wide */
22112 +
22113 + u64 ovfl_mask; /* overflow mask */
22114 +};
22115 +
22116 +static inline void *pfm_pmu_info(void)
22117 +{
22118 + return pfm_pmu_conf->pmu_info;
22119 +}
22120 +
22121 +/*
22122 + * pfm_pmu_config flags
22123 + */
22124 +#define PFM_PMUFL_IS_BUILTIN 0x1 /* pmu config is compiled in */
22125 +
22126 +/*
22127 + * we need to know whether the PMU description is builtin or compiled
22128 + * as a module
22129 + */
22130 +#ifdef MODULE
22131 +#define PFM_PMU_BUILTIN_FLAG 0 /* not built as a module */
22132 +#else
22133 +#define PFM_PMU_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */
22134 +#endif
22135 +
22136 +int pfm_pmu_register(struct pfm_pmu_config *cfg);
22137 +void pfm_pmu_unregister(struct pfm_pmu_config *cfg);
22138 +
22139 +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu);
22140 +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
22141 +
22142 +#endif /* __PERFMON_PMU_H__ */
22143 diff --git a/include/linux/sched.h b/include/linux/sched.h
22144 index 3d9120c..8fb3b55 100644
22145 --- a/include/linux/sched.h
22146 +++ b/include/linux/sched.h
22147 @@ -96,6 +96,7 @@ struct exec_domain;
22148 struct futex_pi_state;
22149 struct robust_list_head;
22150 struct bio;
22151 +struct pfm_context;
22152
22153 /*
22154 * List of flags we want to share for kernel threads,
22155 @@ -1301,6 +1302,9 @@ struct task_struct {
22156 int latency_record_count;
22157 struct latency_record latency_record[LT_SAVECOUNT];
22158 #endif
22159 +#ifdef CONFIG_PERFMON
22160 + struct pfm_context *pfm_context;
22161 +#endif
22162 };
22163
22164 /*
22165 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
22166 index d6ff145..e308523 100644
22167 --- a/include/linux/syscalls.h
22168 +++ b/include/linux/syscalls.h
22169 @@ -29,6 +29,13 @@ struct msqid_ds;
22170 struct new_utsname;
22171 struct nfsctl_arg;
22172 struct __old_kernel_stat;
22173 +struct pfarg_ctx;
22174 +struct pfarg_pmc;
22175 +struct pfarg_pmd;
22176 +struct pfarg_start;
22177 +struct pfarg_load;
22178 +struct pfarg_setinfo;
22179 +struct pfarg_setdesc;
22180 struct pollfd;
22181 struct rlimit;
22182 struct rusage;
22183 @@ -625,4 +632,27 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
22184
22185 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
22186
22187 +asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq,
22188 + void __user *uarg, size_t smpl_size);
22189 +asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq,
22190 + int count);
22191 +asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq,
22192 + int count);
22193 +asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq,
22194 + int count);
22195 +asmlinkage long sys_pfm_restart(int fd);
22196 +asmlinkage long sys_pfm_stop(int fd);
22197 +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq);
22198 +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq);
22199 +asmlinkage long sys_pfm_unload_context(int fd);
22200 +asmlinkage long sys_pfm_delete_evtsets(int fd,
22201 + struct pfarg_setinfo __user *ureq,
22202 + int count);
22203 +asmlinkage long sys_pfm_create_evtsets(int fd,
22204 + struct pfarg_setdesc __user *ureq,
22205 + int count);
22206 +asmlinkage long sys_pfm_getinfo_evtsets(int fd,
22207 + struct pfarg_setinfo __user *ureq,
22208 + int count);
22209 +
22210 #endif
22211 diff --git a/kernel/sched.c b/kernel/sched.c
22212 index ad1962d..1bc8fcf 100644
22213 --- a/kernel/sched.c
22214 +++ b/kernel/sched.c
22215 @@ -71,6 +71,7 @@
22216 #include <linux/debugfs.h>
22217 #include <linux/ctype.h>
22218 #include <linux/ftrace.h>
22219 +#include <linux/perfmon_kern.h>
22220
22221 #include <asm/tlb.h>
22222 #include <asm/irq_regs.h>
22223 diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
22224 index 08d6e1b..61f4155 100644
22225 --- a/kernel/sys_ni.c
22226 +++ b/kernel/sys_ni.c
22227 @@ -126,6 +126,19 @@ cond_syscall(sys_vm86);
22228 cond_syscall(compat_sys_ipc);
22229 cond_syscall(compat_sys_sysctl);
22230
22231 +cond_syscall(sys_pfm_create_context);
22232 +cond_syscall(sys_pfm_write_pmcs);
22233 +cond_syscall(sys_pfm_write_pmds);
22234 +cond_syscall(sys_pfm_read_pmds);
22235 +cond_syscall(sys_pfm_restart);
22236 +cond_syscall(sys_pfm_start);
22237 +cond_syscall(sys_pfm_stop);
22238 +cond_syscall(sys_pfm_load_context);
22239 +cond_syscall(sys_pfm_unload_context);
22240 +cond_syscall(sys_pfm_create_evtsets);
22241 +cond_syscall(sys_pfm_delete_evtsets);
22242 +cond_syscall(sys_pfm_getinfo_evtsets);
22243 +
22244 /* arch-specific weak syscall entries */
22245 cond_syscall(sys_pciconfig_read);
22246 cond_syscall(sys_pciconfig_write);
22247 diff --git a/perfmon/Makefile b/perfmon/Makefile
22248 new file mode 100644
22249 index 0000000..32ff037
22250 --- /dev/null
22251 +++ b/perfmon/Makefile
22252 @@ -0,0 +1,12 @@
22253 +#
22254 +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
22255 +# Contributed by Stephane Eranian <eranian@hpl.hp.com>
22256 +#
22257 +obj-y = perfmon_init.o perfmon_rw.o perfmon_res.o \
22258 + perfmon_pmu.o perfmon_sysfs.o perfmon_syscalls.o \
22259 + perfmon_file.o perfmon_ctxsw.o perfmon_intr.o \
22260 + perfmon_dfl_smpl.o perfmon_sets.o perfmon_hotplug.o \
22261 + perfmon_msg.o perfmon_smpl.o perfmon_attach.o \
22262 + perfmon_activate.o perfmon_ctx.o perfmon_fmt.o
22263 +
22264 +obj-$(CONFIG_PERFMON_DEBUG_FS) += perfmon_debugfs.o
22265 diff --git a/perfmon/perfmon_activate.c b/perfmon/perfmon_activate.c
22266 new file mode 100644
22267 index 0000000..d9f501d
22268 --- /dev/null
22269 +++ b/perfmon/perfmon_activate.c
22270 @@ -0,0 +1,265 @@
22271 +/*
22272 + * perfmon_activate.c: perfmon2 start/stop functions
22273 + *
22274 + * This file implements the perfmon2 interface which
22275 + * provides access to the hardware performance counters
22276 + * of the host processor.
22277 + *
22278 + *
22279 + * The initial version of perfmon.c was written by
22280 + * Ganesh Venkitachalam, IBM Corp.
22281 + *
22282 + * Then it was modified for perfmon-1.x by Stephane Eranian and
22283 + * David Mosberger, Hewlett Packard Co.
22284 + *
22285 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
22286 + * by Stephane Eranian, Hewlett Packard Co.
22287 + *
22288 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
22289 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
22290 + * David Mosberger-Tang <davidm@hpl.hp.com>
22291 + *
22292 + * More information about perfmon available at:
22293 + * http://perfmon2.sf.net
22294 + *
22295 + * This program is free software; you can redistribute it and/or
22296 + * modify it under the terms of version 2 of the GNU General Public
22297 + * License as published by the Free Software Foundation.
22298 + *
22299 + * This program is distributed in the hope that it will be useful,
22300 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22301 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22302 + * General Public License for more details.
22303 + *
22304 + * You should have received a copy of the GNU General Public License
22305 + * along with this program; if not, write to the Free Software
22306 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22307 + * 02111-1307 USA
22308 + */
22309 +#include <linux/kernel.h>
22310 +#include <linux/perfmon_kern.h>
22311 +#include "perfmon_priv.h"
22312 +
22313 +/**
22314 + * __pfm_start - activate monitoring
22315 + * @ctx: context to operate on
22316 + * @start: pfarg_start as passed by user
22317 + *
22318 + * When operating in per-thread mode and not self-monitoring, the monitored
22319 + * thread must be stopped. Activation will be effective next time the thread
22320 + * is context switched in.
22321 + *
22322 + * The pfarg_start argument is optional and may be used to designate
22323 + * the initial event set to activate. When not provided, the last active
22324 + * set is used. For the first activation, set0 is used when start is NULL.
22325 + *
22326 + * On some architectures, e.g., IA-64, it may be possible to start monitoring
22327 + * without calling this function under certain conditions (per-thread and self
22328 + * monitoring). In this case, either set0 or the last active set is used.
22329 + *
22330 + * the context is locked and interrupts are disabled.
22331 + */
22332 +int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start)
22333 +{
22334 + struct task_struct *task, *owner_task;
22335 + struct pfm_event_set *new_set, *old_set;
22336 + int is_self;
22337 +
22338 + task = ctx->task;
22339 +
22340 + /*
22341 + * UNLOADED: error
22342 + * LOADED : normal start, nop if started unless set is different
22343 + * MASKED : nop or change set when unmasking
22344 + * ZOMBIE : cannot happen
22345 + */
22346 + if (ctx->state == PFM_CTX_UNLOADED)
22347 + return -EINVAL;
22348 +
22349 + old_set = new_set = ctx->active_set;
22350 +
22351 + /*
22352 + * always the case for system-wide
22353 + */
22354 + if (task == NULL)
22355 + task = current;
22356 +
22357 + is_self = task == current;
22358 +
22359 + /*
22360 + * argument is provided?
22361 + */
22362 + if (start) {
22363 + /*
22364 + * find the set to load first
22365 + */
22366 + new_set = pfm_find_set(ctx, start->start_set, 0);
22367 + if (new_set == NULL) {
22368 + PFM_DBG("event set%u does not exist",
22369 + start->start_set);
22370 + return -EINVAL;
22371 + }
22372 + }
22373 +
22374 + PFM_DBG("cur_set=%u req_set=%u", old_set->id, new_set->id);
22375 +
22376 + /*
22377 + * if we need to change the active set we need
22378 + * to check if we can access the PMU
22379 + */
22380 + if (new_set != old_set) {
22381 +
22382 + owner_task = __get_cpu_var(pmu_owner);
22383 + /*
22384 + * system-wide: must run on the right CPU
22385 + * per-thread : must be the owner of the PMU context
22386 + *
22387 + * pfm_switch_sets() returns with monitoring stopped
22388 + */
22389 + if (is_self) {
22390 + pfm_switch_sets(ctx, new_set, PFM_PMD_RESET_LONG, 1);
22391 + } else {
22392 + /*
22393 + * In a UP kernel, the PMU may contain the state
22394 + * of the task we want to operate on, yet the task
22395 + * may be switched out (lazy save). We need to save
22396 + * current state (old_set), switch active_set and
22397 + * mark it for reload.
22398 + */
22399 + if (owner_task == task)
22400 + pfm_save_pmds(ctx, old_set);
22401 + ctx->active_set = new_set;
22402 + new_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
22403 + }
22404 + }
22405 +
22406 + /*
22407 + * mark as started
22408 + * must be done before calling pfm_arch_start()
22409 + */
22410 + ctx->flags.started = 1;
22411 +
22412 + pfm_arch_start(task, ctx);
22413 +
22414 + /*
22415 + * we check whether we had a pending ovfl before restarting.
22416 + * If so we need to regenerate the interrupt to make sure we
22417 + * keep recorded samples. For non-self monitoring this check
22418 + * is done in the pfm_ctxswin_thread() routine.
22419 + *
22420 + * we check new_set/old_set because pfm_switch_sets() already
22421 + * takes care of replaying the pending interrupts
22422 + */
22423 + if (is_self && new_set != old_set && new_set->npend_ovfls) {
22424 + pfm_arch_resend_irq(ctx);
22425 + pfm_stats_inc(ovfl_intr_replay_count);
22426 + }
22427 +
22428 + /*
22429 + * always start with full timeout
22430 + */
22431 + new_set->hrtimer_rem = new_set->hrtimer_exp;
22432 +
22433 + /*
22434 + * activate timeout for system-wide, self-montoring
22435 + * Always start with full timeout
22436 + * Timeout is at least one tick away, so no risk of
22437 + * having hrtimer_start() trying to wakeup softirqd
22438 + * and thus causing troubles. This cannot happen anmyway
22439 + * because cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
22440 + */
22441 + if (is_self && new_set->flags & PFM_SETFL_TIME_SWITCH) {
22442 + hrtimer_start(&__get_cpu_var(pfm_hrtimer),
22443 + new_set->hrtimer_rem,
22444 + HRTIMER_MODE_REL);
22445 +
22446 + PFM_DBG("set%u started timeout=%lld",
22447 + new_set->id,
22448 + (unsigned long long)new_set->hrtimer_rem.tv64);
22449 + }
22450 +
22451 + /*
22452 + * we restart total duration even if context was
22453 + * already started. In that case, counts are simply
22454 + * reset.
22455 + *
22456 + * For per-thread, if not self-monitoring, the statement
22457 + * below will have no effect because thread is stopped.
22458 + * The field is reset of ctxsw in.
22459 + */
22460 + new_set->duration_start = sched_clock();
22461 +
22462 + return 0;
22463 +}
22464 +
22465 +/**
22466 + * __pfm_stop - stop monitoring
22467 + * @ctx: context to operate on
22468 + * @release_info: infos for caller (see below)
22469 + *
22470 + * When operating in per-thread* mode and when not self-monitoring,
22471 + * the monitored thread must be stopped.
22472 + *
22473 + * the context is locked and interrupts are disabled.
22474 + *
22475 + * release_info value upon return:
22476 + * - bit 0 : unused
22477 + * - bit 1 : when set, must cancel hrtimer
22478 + */
22479 +int __pfm_stop(struct pfm_context *ctx, int *release_info)
22480 +{
22481 + struct pfm_event_set *set;
22482 + struct task_struct *task;
22483 + u64 now;
22484 + int state;
22485 +
22486 + *release_info = 0;
22487 +
22488 + now = sched_clock();
22489 + state = ctx->state;
22490 + set = ctx->active_set;
22491 +
22492 + /*
22493 + * context must be attached (zombie cannot happen)
22494 + */
22495 + if (state == PFM_CTX_UNLOADED)
22496 + return -EINVAL;
22497 +
22498 + task = ctx->task;
22499 +
22500 + PFM_DBG("ctx_task=[%d] ctx_state=%d is_system=%d",
22501 + task ? task->pid : -1,
22502 + state,
22503 + !task);
22504 +
22505 + /*
22506 + * this happens for system-wide context
22507 + */
22508 + if (task == NULL)
22509 + task = current;
22510 +
22511 + /*
22512 + * compute elapsed time
22513 + *
22514 + * unless masked, compute elapsed duration, stop timeout
22515 + */
22516 + if (task == current && state == PFM_CTX_LOADED) {
22517 + /*
22518 + * timeout cancel must be deferred until context is
22519 + * unlocked to avoid race with pfm_handle_switch_timeout()
22520 + */
22521 + if (set->flags & PFM_SETFL_TIME_SWITCH)
22522 + *release_info |= 0x2;
22523 +
22524 + set->duration += now - set->duration_start;
22525 + }
22526 +
22527 + pfm_arch_stop(task, ctx);
22528 +
22529 + ctx->flags.started = 0;
22530 + /*
22531 + * starting now, in-flight PMU interrupt for this context
22532 + * are treated as spurious
22533 + */
22534 + return 0;
22535 +}
22536 diff --git a/perfmon/perfmon_attach.c b/perfmon/perfmon_attach.c
22537 new file mode 100644
22538 index 0000000..bbd1d1e
22539 --- /dev/null
22540 +++ b/perfmon/perfmon_attach.c
22541 @@ -0,0 +1,474 @@
22542 +/*
22543 + * perfmon_attach.c: perfmon2 load/unload functions
22544 + *
22545 + * This file implements the perfmon2 interface which
22546 + * provides access to the hardware performance counters
22547 + * of the host processor.
22548 + *
22549 + *
22550 + * The initial version of perfmon.c was written by
22551 + * Ganesh Venkitachalam, IBM Corp.
22552 + *
22553 + * Then it was modified for perfmon-1.x by Stephane Eranian and
22554 + * David Mosberger, Hewlett Packard Co.
22555 + *
22556 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
22557 + * by Stephane Eranian, Hewlett Packard Co.
22558 + *
22559 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
22560 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
22561 + * David Mosberger-Tang <davidm@hpl.hp.com>
22562 + *
22563 + * More information about perfmon available at:
22564 + * http://perfmon2.sf.net
22565 + *
22566 + * This program is free software; you can redistribute it and/or
22567 + * modify it under the terms of version 2 of the GNU General Public
22568 + * License as published by the Free Software Foundation.
22569 + *
22570 + * This program is distributed in the hope that it will be useful,
22571 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
22572 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22573 + * General Public License for more details.
22574 + *
22575 + * You should have received a copy of the GNU General Public License
22576 + * along with this program; if not, write to the Free Software
22577 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22578 + * 02111-1307 USA
22579 + */
22580 +#include <linux/kernel.h>
22581 +#include <linux/fs.h>
22582 +#include <linux/perfmon_kern.h>
22583 +#include "perfmon_priv.h"
22584 +
22585 +/**
22586 + * __pfm_load_context_sys - attach context to a CPU in system-wide mode
22587 + * @ctx: context to operate on
22588 + * @set_id: set to activate first
22589 + * @cpu: CPU to monitor
22590 + *
22591 + * The cpu specified in the pfarg_load.load_pid argument must be the current
22592 + * CPU.
22593 + *
22594 + * The function must be called with the context locked and interrupts disabled.
22595 + */
22596 +static int pfm_load_ctx_sys(struct pfm_context *ctx, u16 set_id, u32 cpu)
22597 +{
22598 + struct pfm_event_set *set;
22599 + int mycpu;
22600 + int ret;
22601 +
22602 + mycpu = smp_processor_id();
22603 +
22604 + /*
22605 + * system-wide: check we are running on the desired CPU
22606 + */
22607 + if (cpu != mycpu) {
22608 + PFM_DBG("wrong CPU: asking %u but on %u", cpu, mycpu);
22609 + return -EINVAL;
22610 + }
22611 +
22612 + /*
22613 + * initialize sets
22614 + */
22615 + set = pfm_prepare_sets(ctx, set_id);
22616 + if (!set) {
22617 + PFM_DBG("event set%u does not exist", set_id);
22618 + return -EINVAL;
22619 + }
22620 +
22621 + PFM_DBG("set=%u set_flags=0x%x", set->id, set->flags);
22622 +
22623 + ctx->cpu = mycpu;
22624 + ctx->task = NULL;
22625 + ctx->active_set = set;
22626 +
22627 + /*
22628 + * perform any architecture specific actions
22629 + */
22630 + ret = pfm_arch_load_context(ctx);
22631 + if (ret)
22632 + goto error_noload;
22633 +
22634 + /*
22635 + * now reserve the session, before we can proceed with
22636 + * actually accessing the PMU hardware
22637 + */
22638 + ret = pfm_session_acquire(1, mycpu);
22639 + if (ret)
22640 + goto error;
22641 +
22642 +
22643 + /*
22644 + * caller must be on monitored CPU to access PMU, thus this is
22645 + * a form of self-monitoring
22646 + */
22647 + ctx->flags.is_self = 1;
22648 +
22649 + set->runs++;
22650 +
22651 + /*
22652 + * load PMD from set
22653 + * load PMC from set
22654 + */
22655 + pfm_arch_restore_pmds(ctx, set);
22656 + pfm_arch_restore_pmcs(ctx, set);
22657 +
22658 + /*
22659 + * set new ownership
22660 + */
22661 + pfm_set_pmu_owner(NULL, ctx);
22662 +
22663 + /*
22664 + * reset pending work
22665 + */
22666 + ctx->flags.work_type = PFM_WORK_NONE;
22667 + ctx->flags.reset_count = 0;
22668 +
22669 + /*
22670 + * reset message queue
22671 + */
22672 + ctx->msgq_head = ctx->msgq_tail = 0;
22673 +
22674 + ctx->state = PFM_CTX_LOADED;
22675 +
22676 + return 0;
22677 +error:
22678 + pfm_arch_unload_context(ctx);
22679 +error_noload:
22680 + return ret;
22681 +}
22682 +
22683 +/**
22684 + * __pfm_load_context_thread - attach context to a thread
22685 + * @ctx: context to operate on
22686 + * @set_id: first set
22687 + * @task: threadf to attach to
22688 + *
22689 + * The function must be called with the context locked and interrupts disabled.
22690 + */
22691 +static int pfm_load_ctx_thread(struct pfm_context *ctx, u16 set_id,
22692 + struct task_struct *task)
22693 +{
22694 + struct pfm_event_set *set;
22695 + struct pfm_context *old;
22696 + int ret;
22697 +
22698 + PFM_DBG("load_pid=%d set=%u", task->pid, set_id);
22699 + /*
22700 + * per-thread:
22701 + * - task to attach to is checked in sys_pfm_load_context() to avoid
22702 + * locking issues. if found, and not self, task refcount was
22703 + * incremented.
22704 + */
22705 + old = cmpxchg(&task->pfm_context, NULL, ctx);
22706 + if (old) {
22707 + PFM_DBG("load_pid=%d has a context "
22708 + "old=%p new=%p cur=%p",
22709 + task->pid,
22710 + old,
22711 + ctx,
22712 + task->pfm_context);
22713 + return -EEXIST;
22714 + }
22715 +
22716 + /*
22717 + * initialize sets
22718 + */
22719 + set = pfm_prepare_sets(ctx, set_id);
22720 + if (!set) {
22721 + PFM_DBG("event set%u does not exist", set_id);
22722 + return -EINVAL;
22723 + }
22724 +
22725 +
22726 + ctx->task = task;
22727 + ctx->cpu = -1;
22728 + ctx->active_set = set;
22729 +
22730 + /*
22731 + * perform any architecture specific actions
22732 + */
22733 + ret = pfm_arch_load_context(ctx);
22734 + if (ret)
22735 + goto error_noload;
22736 +
22737 + /*
22738 + * now reserve the session, before we can proceed with
22739 + * actually accessing the PMU hardware
22740 + */
22741 + ret = pfm_session_acquire(0, -1);
22742 + if (ret)
22743 + goto error;
22744 +
22745 +
22746 + set->runs++;
22747 + if (ctx->task != current) {
22748 +
22749 + ctx->flags.is_self = 0;
22750 +
22751 + /* force a full reload */
22752 + ctx->last_act = PFM_INVALID_ACTIVATION;
22753 + ctx->last_cpu = -1;
22754 + set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
22755 +
22756 + } else {
22757 + pfm_check_save_prev_ctx();
22758 +
22759 + ctx->last_cpu = smp_processor_id();
22760 + __get_cpu_var(pmu_activation_number)++;
22761 + ctx->last_act = __get_cpu_var(pmu_activation_number);
22762 +
22763 + ctx->flags.is_self = 1;
22764 +
22765 + /*
22766 + * load PMD from set
22767 + * load PMC from set
22768 + */
22769 + pfm_arch_restore_pmds(ctx, set);
22770 + pfm_arch_restore_pmcs(ctx, set);
22771 +
22772 + /*
22773 + * set new ownership
22774 + */
22775 + pfm_set_pmu_owner(ctx->task, ctx);
22776 + }
22777 + set_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
22778 +
22779 + /*
22780 + * reset pending work
22781 + */
22782 + ctx->flags.work_type = PFM_WORK_NONE;
22783 + ctx->flags.reset_count = 0;
22784 +
22785 + /*
22786 + * reset message queue
22787 + */
22788 + ctx->msgq_head = ctx->msgq_tail = 0;
22789 +
22790 + ctx->state = PFM_CTX_LOADED;
22791 +
22792 + return 0;
22793 +
22794 +error:
22795 + pfm_arch_unload_context(ctx);
22796 + ctx->task = NULL;
22797 +error_noload:
22798 + /*
22799 + * detach context
22800 + */
22801 + task->pfm_context = NULL;
22802 + return ret;
22803 +}
22804 +
22805 +/**
22806 + * __pfm_load_context - attach context to a CPU or thread
22807 + * @ctx: context to operate on
22808 + * @load: pfarg_load as passed by user
22809 + * @task: thread to attach to, NULL for system-wide
22810 + */
22811 +int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *load,
22812 + struct task_struct *task)
22813 +{
22814 + if (ctx->flags.system)
22815 + return pfm_load_ctx_sys(ctx, load->load_set, load->load_pid);
22816 + return pfm_load_ctx_thread(ctx, load->load_set, task);
22817 +}
22818 +
22819 +/**
22820 + * pfm_update_ovfl_pmds - account for pending ovfls on PMDs
22821 + * @ctx: context to operate on
22822 + *
22823 + * This function is always called after pfm_stop has been issued
22824 + */
22825 +static void pfm_update_ovfl_pmds(struct pfm_context *ctx)
22826 +{
22827 + struct pfm_event_set *set;
22828 + u64 *cnt_pmds;
22829 + u64 ovfl_mask;
22830 + u16 num_ovfls, i, first;
22831 +
22832 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
22833 + first = ctx->regs.first_intr_pmd;
22834 + cnt_pmds = ctx->regs.cnt_pmds;
22835 +
22836 + /*
22837 + * look for pending interrupts and adjust PMD values accordingly
22838 + */
22839 + list_for_each_entry(set, &ctx->set_list, list) {
22840 +
22841 + if (!set->npend_ovfls)
22842 + continue;
22843 +
22844 + num_ovfls = set->npend_ovfls;
22845 + PFM_DBG("set%u nintrs=%u", set->id, num_ovfls);
22846 +
22847 + for (i = first; num_ovfls; i++) {
22848 + if (test_bit(i, cast_ulp(set->povfl_pmds))) {
22849 + /* only correct value for counters */
22850 + if (test_bit(i, cast_ulp(cnt_pmds)))
22851 + set->pmds[i].value += 1 + ovfl_mask;
22852 + num_ovfls--;
22853 + }
22854 + PFM_DBG("pmd%u set=%u val=0x%llx",
22855 + i,
22856 + set->id,
22857 + (unsigned long long)set->pmds[i].value);
22858 + }
22859 + /*
22860 + * we need to clear to prevent a pfm_getinfo_evtsets() from
22861 + * returning stale data even after the context is unloaded
22862 + */
22863 + set->npend_ovfls = 0;
22864 + bitmap_zero(cast_ulp(set->povfl_pmds), ctx->regs.max_intr_pmd);
22865 + }
22866 +}
22867 +
22868 +
22869 +/**
22870 + * __pfm_unload_context - detach context from CPU or thread
22871 + * @ctx: context to operate on
22872 + * @release_info: pointer to return info (see below)
22873 + *
22874 + * The function must be called with the context locked and interrupts disabled.
22875 + *
22876 + * release_info value upon return:
22877 + * - bit 0: when set, must free context
22878 + * - bit 1: when set, must cancel hrtimer
22879 + */
22880 +int __pfm_unload_context(struct pfm_context *ctx, int *release_info)
22881 +{
22882 + struct task_struct *task;
22883 + int ret;
22884 +
22885 + PFM_DBG("ctx_state=%d task [%d]",
22886 + ctx->state,
22887 + ctx->task ? ctx->task->pid : -1);
22888 +
22889 + *release_info = 0;
22890 +
22891 + /*
22892 + * unload only when necessary
22893 + */
22894 + if (ctx->state == PFM_CTX_UNLOADED)
22895 + return 0;
22896 +
22897 + task = ctx->task;
22898 +
22899 + /*
22900 + * stop monitoring
22901 + */
22902 + ret = __pfm_stop(ctx, release_info);
22903 + if (ret)
22904 + return ret;
22905 +
22906 + ctx->state = PFM_CTX_UNLOADED;
22907 + ctx->flags.can_restart = 0;
22908 +
22909 + /*
22910 + * save active set
22911 + * UP:
22912 + * if not current task and due to lazy, state may
22913 + * still be live
22914 + * for system-wide, guaranteed to run on correct CPU
22915 + */
22916 + if (__get_cpu_var(pmu_ctx) == ctx) {
22917 + /*
22918 + * pending overflows have been saved by pfm_stop()
22919 + */
22920 + pfm_save_pmds(ctx, ctx->active_set);
22921 + pfm_set_pmu_owner(NULL, NULL);
22922 + PFM_DBG("released ownership");
22923 + }
22924 +
22925 + /*
22926 + * account for pending overflows
22927 + */
22928 + pfm_update_ovfl_pmds(ctx);
22929 +
22930 + /*
22931 + * arch-specific unload operations
22932 + */
22933 + pfm_arch_unload_context(ctx);
22934 +
22935 + /*
22936 + * per-thread: disconnect from monitored task
22937 + */
22938 + if (task) {
22939 + task->pfm_context = NULL;
22940 + ctx->task = NULL;
22941 + clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
22942 + clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
22943 + pfm_arch_disarm_handle_work(task);
22944 + }
22945 + /*
22946 + * session can be freed, must have interrupts enabled
22947 + * thus we release in the caller. Bit 0 signals to the
22948 + * caller that the session can be released.
22949 + */
22950 + *release_info |= 0x1;
22951 +
22952 + return 0;
22953 +}
22954 +
22955 +/**
22956 + * __pfm_exit_thread - detach and free context on thread exit
22957 + */
22958 +void __pfm_exit_thread(void)
22959 +{
22960 + struct pfm_context *ctx;
22961 + unsigned long flags;
22962 + int free_ok = 0, release_info = 0;
22963 + int ret;
22964 +
22965 + ctx = current->pfm_context;
22966 +
22967 + BUG_ON(ctx->flags.system);
22968 +
22969 + spin_lock_irqsave(&ctx->lock, flags);
22970 +
22971 + PFM_DBG("state=%d is_self=%d", ctx->state, ctx->flags.is_self);
22972 +
22973 + /*
22974 + * __pfm_unload_context() cannot fail
22975 + * in the context states we are interested in
22976 + */
22977 + switch (ctx->state) {
22978 + case PFM_CTX_LOADED:
22979 + case PFM_CTX_MASKED:
22980 + __pfm_unload_context(ctx, &release_info);
22981 + /*
22982 + * end notification only sent for non
22983 + * self-monitoring context
22984 + */
22985 + if (!ctx->flags.is_self)
22986 + pfm_end_notify(ctx);
22987 + break;
22988 + case PFM_CTX_ZOMBIE:
22989 + __pfm_unload_context(ctx, &release_info);
22990 + free_ok = 1;
22991 + break;
22992 + default:
22993 + BUG_ON(ctx->state != PFM_CTX_LOADED);
22994 + break;
22995 + }
22996 + spin_unlock_irqrestore(&ctx->lock, flags);
22997 +
22998 + /*
22999 + * cancel timer now that context is unlocked
23000 + */
23001 + if (release_info & 0x2) {
23002 + ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
23003 + PFM_DBG("timeout cancel=%d", ret);
23004 + }
23005 +
23006 + if (release_info & 0x1)
23007 + pfm_session_release(0, 0);
23008 +
23009 + /*
23010 + * All memory free operations (especially for vmalloc'ed memory)
23011 + * MUST be done with interrupts ENABLED.
23012 + */
23013 + if (free_ok)
23014 + pfm_free_context(ctx);
23015 +}
23016 diff --git a/perfmon/perfmon_ctx.c b/perfmon/perfmon_ctx.c
23017 new file mode 100644
23018 index 0000000..afe6078
23019 --- /dev/null
23020 +++ b/perfmon/perfmon_ctx.c
23021 @@ -0,0 +1,314 @@
23022 +/*
23023 + * perfmon_ctx.c: perfmon2 context functions
23024 + *
23025 + * This file implements the perfmon2 interface which
23026 + * provides access to the hardware performance counters
23027 + * of the host processor.
23028 + *
23029 + *
23030 + * The initial version of perfmon.c was written by
23031 + * Ganesh Venkitachalam, IBM Corp.
23032 + *
23033 + * Then it was modified for perfmon-1.x by Stephane Eranian and
23034 + * David Mosberger, Hewlett Packard Co.
23035 + *
23036 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
23037 + * by Stephane Eranian, Hewlett Packard Co.
23038 + *
23039 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
23040 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23041 + * David Mosberger-Tang <davidm@hpl.hp.com>
23042 + *
23043 + * More information about perfmon available at:
23044 + * http://perfmon2.sf.net
23045 + *
23046 + * This program is free software; you can redistribute it and/or
23047 + * modify it under the terms of version 2 of the GNU General Public
23048 + * License as published by the Free Software Foundation.
23049 + *
23050 + * This program is distributed in the hope that it will be useful,
23051 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23052 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23053 + * General Public License for more details.
23054 + *
23055 + * You should have received a copy of the GNU General Public License
23056 + * along with this program; if not, write to the Free Software
23057 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23058 + * 02111-1307 USA
23059 + */
23060 +#include <linux/kernel.h>
23061 +#include <linux/fs.h>
23062 +#include <linux/perfmon_kern.h>
23063 +#include "perfmon_priv.h"
23064 +
23065 +/*
23066 + * context memory pool pointer
23067 + */
23068 +static struct kmem_cache *pfm_ctx_cachep;
23069 +
23070 +/**
23071 + * pfm_free_context - de-allocate context and associated resources
23072 + * @ctx: context to free
23073 + */
23074 +void pfm_free_context(struct pfm_context *ctx)
23075 +{
23076 + pfm_arch_context_free(ctx);
23077 +
23078 + pfm_free_sets(ctx);
23079 +
23080 + pfm_smpl_buf_free(ctx);
23081 +
23082 + PFM_DBG("free ctx @0x%p", ctx);
23083 + kmem_cache_free(pfm_ctx_cachep, ctx);
23084 + /*
23085 + * decrease refcount on:
23086 + * - PMU description table
23087 + * - sampling format
23088 + */
23089 + pfm_pmu_conf_put();
23090 + pfm_pmu_release();
23091 +}
23092 +
23093 +/**
23094 + * pfm_ctx_flags_sane - check if context flags passed by user are okay
23095 + * @ctx_flags: flags passed user on pfm_create_context
23096 + *
23097 + * return:
23098 + * 0 if successful
23099 + * <0 and error code otherwise
23100 + */
23101 +static inline int pfm_ctx_flags_sane(u32 ctx_flags)
23102 +{
23103 + if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
23104 + if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
23105 + PFM_DBG("cannot use blocking mode in syswide mode");
23106 + return -EINVAL;
23107 + }
23108 + }
23109 + return 0;
23110 +}
23111 +
23112 +/**
23113 + * pfm_ctx_permissions - check authorization to create new context
23114 + * @ctx_flags: context flags passed by user
23115 + *
23116 + * check for permissions to create a context.
23117 + *
23118 + * A sysadmin may decide to restrict creation of per-thread
23119 + * and/or system-wide context to a group of users using the
23120 + * group id via /sys/kernel/perfmon/task_group and
23121 + * /sys/kernel/perfmon/sys_group.
23122 + *
23123 + * Once we identify a user level package which can be used
23124 + * to grant/revoke Linux capabilites at login via PAM, we will
23125 + * be able to use capabilities. We would also need to increase
23126 + * the size of cap_t to support more than 32 capabilities (it
23127 + * is currently defined as u32 and 32 capabilities are alrady
23128 + * defined).
23129 + */
23130 +static inline int pfm_ctx_permissions(u32 ctx_flags)
23131 +{
23132 + if ((ctx_flags & PFM_FL_SYSTEM_WIDE)
23133 + && pfm_controls.sys_group != PFM_GROUP_PERM_ANY
23134 + && !in_group_p(pfm_controls.sys_group)) {
23135 + PFM_DBG("user group not allowed to create a syswide ctx");
23136 + return -EPERM;
23137 + } else if (pfm_controls.task_group != PFM_GROUP_PERM_ANY
23138 + && !in_group_p(pfm_controls.task_group)) {
23139 + PFM_DBG("user group not allowed to create a task context");
23140 + return -EPERM;
23141 + }
23142 + return 0;
23143 +}
23144 +
23145 +/**
23146 + * __pfm_create_context - allocate and initialize a perfmon context
23147 + * @req : pfarg_ctx from user
23148 + * @fmt : pointer sampling format, NULL if not used
23149 + * @fmt_arg: pointer to argument to sampling format, NULL if not used
23150 + * @mode: PFM_NORMAL or PFM_COMPAT(IA-64 v2.0 compatibility)
23151 + * @ctx : address of new context upon succesful return, undefined otherwise
23152 + *
23153 + * function used to allocate a new context. A context is allocated along
23154 + * with the default event set. If a sampling format is used, the buffer
23155 + * may be allocated and initialized.
23156 + *
23157 + * The file descriptor identifying the context is allocated and returned
23158 + * to caller.
23159 + *
23160 + * This function operates with no locks and interrupts are enabled.
23161 + * return:
23162 + * >=0: the file descriptor to identify the context
23163 + * <0 : the error code
23164 + */
23165 +int __pfm_create_context(struct pfarg_ctx *req,
23166 + struct pfm_smpl_fmt *fmt,
23167 + void *fmt_arg,
23168 + int mode,
23169 + struct pfm_context **new_ctx)
23170 +{
23171 + struct pfm_context *ctx;
23172 + struct file *filp = NULL;
23173 + u32 ctx_flags;
23174 + int fd = 0, ret;
23175 +
23176 + ctx_flags = req->ctx_flags;
23177 +
23178 + /* Increase refcount on PMU description */
23179 + ret = pfm_pmu_conf_get(1);
23180 + if (ret < 0)
23181 + goto error_conf;
23182 +
23183 + ret = pfm_ctx_flags_sane(ctx_flags);
23184 + if (ret < 0)
23185 + goto error_alloc;
23186 +
23187 + ret = pfm_ctx_permissions(ctx_flags);
23188 + if (ret < 0)
23189 + goto error_alloc;
23190 +
23191 + /*
23192 + * we can use GFP_KERNEL and potentially sleep because we do
23193 + * not hold any lock at this point.
23194 + */
23195 + might_sleep();
23196 + ret = -ENOMEM;
23197 + ctx = kmem_cache_zalloc(pfm_ctx_cachep, GFP_KERNEL);
23198 + if (!ctx)
23199 + goto error_alloc;
23200 +
23201 + PFM_DBG("alloc ctx @0x%p", ctx);
23202 +
23203 + INIT_LIST_HEAD(&ctx->set_list);
23204 + spin_lock_init(&ctx->lock);
23205 + init_completion(&ctx->restart_complete);
23206 + init_waitqueue_head(&ctx->msgq_wait);
23207 +
23208 + /*
23209 + * context is unloaded
23210 + */
23211 + ctx->state = PFM_CTX_UNLOADED;
23212 +
23213 + /*
23214 + * initialization of context's flags
23215 + * must be done before pfm_find_set()
23216 + */
23217 + ctx->flags.block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
23218 + ctx->flags.system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
23219 + ctx->flags.no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
23220 + ctx->flags.ia64_v20_compat = mode == PFM_COMPAT ? 1 : 0;
23221 +
23222 + ret = pfm_pmu_acquire(ctx);
23223 + if (ret)
23224 + goto error_file;
23225 + /*
23226 + * check if PMU is usable
23227 + */
23228 + if (!(ctx->regs.num_pmcs && ctx->regs.num_pmcs)) {
23229 + PFM_DBG("no usable PMU registers");
23230 + ret = -EBUSY;
23231 + goto error_file;
23232 + }
23233 +
23234 + /*
23235 + * link to format, must be done first for correct
23236 + * error handling in pfm_context_free()
23237 + */
23238 + ctx->smpl_fmt = fmt;
23239 +
23240 + ret = -ENFILE;
23241 + fd = pfm_alloc_fd(&filp);
23242 + if (fd < 0)
23243 + goto error_file;
23244 +
23245 + /*
23246 + * initialize arch-specific section
23247 + * must be done before fmt_init()
23248 + */
23249 + ret = pfm_arch_context_create(ctx, ctx_flags);
23250 + if (ret)
23251 + goto error_set;
23252 +
23253 + ret = -ENOMEM;
23254 +
23255 + /*
23256 + * add initial set
23257 + */
23258 + if (pfm_create_initial_set(ctx))
23259 + goto error_set;
23260 +
23261 + /*
23262 + * does the user want to sample?
23263 + * must be done after pfm_pmu_acquire() because
23264 + * needs ctx->regs
23265 + */
23266 + if (fmt) {
23267 + ret = pfm_setup_smpl_fmt(ctx, ctx_flags, fmt_arg, filp);
23268 + if (ret)
23269 + goto error_set;
23270 + }
23271 +
23272 + filp->private_data = ctx;
23273 +
23274 + ctx->last_act = PFM_INVALID_ACTIVATION;
23275 + ctx->last_cpu = -1;
23276 +
23277 + /*
23278 + * initialize notification message queue
23279 + */
23280 + ctx->msgq_head = ctx->msgq_tail = 0;
23281 +
23282 + PFM_DBG("flags=0x%x system=%d notify_block=%d no_msg=%d"
23283 + " use_fmt=%d ctx_fd=%d mode=%d",
23284 + ctx_flags,
23285 + ctx->flags.system,
23286 + ctx->flags.block,
23287 + ctx->flags.no_msg,
23288 + !!fmt,
23289 + fd, mode);
23290 +
23291 + if (new_ctx)
23292 + *new_ctx = ctx;
23293 +
23294 + /*
23295 + * we defer the fd_install until we are certain the call succeeded
23296 + * to ensure we do not have to undo its effect. Neither put_filp()
23297 + * nor put_unused_fd() undoes the effect of fd_install().
23298 + */
23299 + fd_install(fd, filp);
23300 +
23301 + return fd;
23302 +
23303 +error_set:
23304 + put_filp(filp);
23305 + put_unused_fd(fd);
23306 +error_file:
23307 + /*
23308 + * calls the right *_put() functions
23309 + * calls pfm_release_pmu()
23310 + */
23311 + pfm_free_context(ctx);
23312 + return ret;
23313 +error_alloc:
23314 + pfm_pmu_conf_put();
23315 +error_conf:
23316 + pfm_smpl_fmt_put(fmt);
23317 + return ret;
23318 +}
23319 +
23320 +/**
23321 + * pfm_init_ctx -- initialize context SLAB
23322 + *
23323 + * called from pfm_init
23324 + */
23325 +int __init pfm_init_ctx(void)
23326 +{
23327 + pfm_ctx_cachep = kmem_cache_create("pfm_context",
23328 + sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE,
23329 + SLAB_HWCACHE_ALIGN, 0, NULL);
23330 + if (!pfm_ctx_cachep) {
23331 + PFM_ERR("cannot initialize context slab");
23332 + return -ENOMEM;
23333 + }
23334 + return 0;
23335 +}
23336 diff --git a/perfmon/perfmon_ctxsw.c b/perfmon/perfmon_ctxsw.c
23337 new file mode 100644
23338 index 0000000..9a28d13
23339 --- /dev/null
23340 +++ b/perfmon/perfmon_ctxsw.c
23341 @@ -0,0 +1,342 @@
23342 +/*
23343 + * perfmon_cxtsw.c: perfmon2 context switch code
23344 + *
23345 + * This file implements the perfmon2 interface which
23346 + * provides access to the hardware performance counters
23347 + * of the host processor.
23348 + *
23349 + * The initial version of perfmon.c was written by
23350 + * Ganesh Venkitachalam, IBM Corp.
23351 + *
23352 + * Then it was modified for perfmon-1.x by Stephane Eranian and
23353 + * David Mosberger, Hewlett Packard Co.
23354 + *
23355 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
23356 + * by Stephane Eranian, Hewlett Packard Co.
23357 + *
23358 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
23359 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23360 + * David Mosberger-Tang <davidm@hpl.hp.com>
23361 + *
23362 + * More information about perfmon available at:
23363 + * http://perfmon2.sf.net
23364 + *
23365 + * This program is free software; you can redistribute it and/or
23366 + * modify it under the terms of version 2 of the GNU General Public
23367 + * License as published by the Free Software Foundation.
23368 + *
23369 + * This program is distributed in the hope that it will be useful,
23370 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23371 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23372 + * General Public License for more details.
23373 + *
23374 + * You should have received a copy of the GNU General Public License
23375 + * along with this program; if not, write to the Free Software
23376 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23377 + * 02111-1307 USA
23378 + */
23379 +#include <linux/kernel.h>
23380 +#include <linux/perfmon_kern.h>
23381 +#include "perfmon_priv.h"
23382 +
23383 +void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
23384 +{
23385 + u64 val, ovfl_mask;
23386 + u64 *used_pmds, *cnt_pmds;
23387 + u16 i, num;
23388 +
23389 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
23390 + num = set->nused_pmds;
23391 + cnt_pmds = ctx->regs.cnt_pmds;
23392 + used_pmds = set->used_pmds;
23393 +
23394 + /*
23395 + * save HW PMD, for counters, reconstruct 64-bit value
23396 + */
23397 + for (i = 0; num; i++) {
23398 + if (test_bit(i, cast_ulp(used_pmds))) {
23399 + val = pfm_read_pmd(ctx, i);
23400 + if (likely(test_bit(i, cast_ulp(cnt_pmds))))
23401 + val = (set->pmds[i].value & ~ovfl_mask) |
23402 + (val & ovfl_mask);
23403 + set->pmds[i].value = val;
23404 + num--;
23405 + }
23406 + }
23407 + pfm_arch_clear_pmd_ovfl_cond(ctx, set);
23408 +}
23409 +
23410 +/*
23411 + * interrupts are disabled (no preemption)
23412 + */
23413 +void __pfm_ctxswin_thread(struct task_struct *task,
23414 + struct pfm_context *ctx, u64 now)
23415 +{
23416 + u64 cur_act;
23417 + struct pfm_event_set *set;
23418 + int reload_pmcs, reload_pmds;
23419 + int mycpu, is_active;
23420 +
23421 + mycpu = smp_processor_id();
23422 +
23423 + cur_act = __get_cpu_var(pmu_activation_number);
23424 + /*
23425 + * we need to lock context because it could be accessed
23426 + * from another CPU. Normally the schedule() functions
23427 + * has masked interrupts which should be enough to
23428 + * protect against PMU interrupts.
23429 + */
23430 + spin_lock(&ctx->lock);
23431 +
23432 + is_active = pfm_arch_is_active(ctx);
23433 +
23434 + set = ctx->active_set;
23435 +
23436 + /*
23437 + * in case fo zombie, we do not complete ctswin of the
23438 + * PMU, and we force a call to pfm_handle_work() to finish
23439 + * cleanup, i.e., free context + smpl_buff. The reason for
23440 + * deferring to pfm_handle_work() is that it is not possible
23441 + * to vfree() with interrupts disabled.
23442 + */
23443 + if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) {
23444 + pfm_post_work(task, ctx, PFM_WORK_ZOMBIE);
23445 + goto done;
23446 + }
23447 +
23448 + /*
23449 + * if we were the last user of the PMU on that CPU,
23450 + * then nothing to do except restore psr
23451 + */
23452 + if (ctx->last_cpu == mycpu && ctx->last_act == cur_act) {
23453 + /*
23454 + * check for forced reload conditions
23455 + */
23456 + reload_pmcs = set->priv_flags & PFM_SETFL_PRIV_MOD_PMCS;
23457 + reload_pmds = set->priv_flags & PFM_SETFL_PRIV_MOD_PMDS;
23458 + } else {
23459 +#ifndef CONFIG_SMP
23460 + pfm_check_save_prev_ctx();
23461 +#endif
23462 + reload_pmcs = 1;
23463 + reload_pmds = 1;
23464 + }
23465 + /* consumed */
23466 + set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
23467 +
23468 + if (reload_pmds)
23469 + pfm_arch_restore_pmds(ctx, set);
23470 +
23471 + /*
23472 + * need to check if had in-flight interrupt in
23473 + * pfm_ctxswout_thread(). If at least one bit set, then we must replay
23474 + * the interrupt to avoid losing some important performance data.
23475 + *
23476 + * npend_ovfls is cleared in interrupt handler
23477 + */
23478 + if (set->npend_ovfls) {
23479 + pfm_arch_resend_irq(ctx);
23480 + pfm_stats_inc(ovfl_intr_replay_count);
23481 + }
23482 +
23483 + if (reload_pmcs)
23484 + pfm_arch_restore_pmcs(ctx, set);
23485 +
23486 + /*
23487 + * record current activation for this context
23488 + */
23489 + __get_cpu_var(pmu_activation_number)++;
23490 + ctx->last_cpu = mycpu;
23491 + ctx->last_act = __get_cpu_var(pmu_activation_number);
23492 +
23493 + /*
23494 + * establish new ownership.
23495 + */
23496 + pfm_set_pmu_owner(task, ctx);
23497 +
23498 + pfm_arch_ctxswin_thread(task, ctx);
23499 + /*
23500 + * set->duration does not count when context in MASKED state.
23501 + * set->duration_start is reset in unmask_monitoring()
23502 + */
23503 + set->duration_start = now;
23504 +
23505 + /*
23506 + * re-arm switch timeout, if necessary
23507 + * Timeout is active only if monitoring is active,
23508 + * i.e., LOADED + started
23509 + *
23510 + * We reload the remainder timeout or the full timeout.
23511 + * Remainder is recorded on context switch out or in
23512 + * pfm_load_context()
23513 + */
23514 + if (ctx->state == PFM_CTX_LOADED
23515 + && (set->flags & PFM_SETFL_TIME_SWITCH) && is_active) {
23516 + pfm_restart_timer(ctx, set);
23517 + /* careful here as pfm_restart_timer may switch sets */
23518 + }
23519 +done:
23520 + spin_unlock(&ctx->lock);
23521 +}
23522 +
23523 +/*
23524 + * interrupts are masked, runqueue lock is held.
23525 + *
23526 + * In UP. we simply stop monitoring and leave the state
23527 + * in place, i.e., lazy save
23528 + */
23529 +void __pfm_ctxswout_thread(struct task_struct *task,
23530 + struct pfm_context *ctx, u64 now)
23531 +{
23532 + struct pfm_event_set *set;
23533 + int need_save_pmds, is_active;
23534 +
23535 + /*
23536 + * we need to lock context because it could be accessed
23537 + * from another CPU. Normally the schedule() functions
23538 + * has masked interrupts which should be enough to
23539 + * protect against PMU interrupts.
23540 + */
23541 +
23542 + spin_lock(&ctx->lock);
23543 +
23544 + is_active = pfm_arch_is_active(ctx);
23545 + set = ctx->active_set;
23546 +
23547 + /*
23548 + * stop monitoring and
23549 + * collect pending overflow information
23550 + * needed on ctxswin. We cannot afford to lose
23551 + * a PMU interrupt.
23552 + */
23553 + need_save_pmds = pfm_arch_ctxswout_thread(task, ctx);
23554 +
23555 + if (ctx->state == PFM_CTX_LOADED) {
23556 + /*
23557 + * accumulate only when set is actively monitoring,
23558 + */
23559 + set->duration += now - set->duration_start;
23560 +
23561 + /*
23562 + * record remaining timeout
23563 + * reload in pfm_ctxsw_in()
23564 + */
23565 + if (is_active && (set->flags & PFM_SETFL_TIME_SWITCH)) {
23566 + struct hrtimer *h = NULL;
23567 + h = &__get_cpu_var(pfm_hrtimer);
23568 + hrtimer_cancel(h);
23569 + set->hrtimer_rem = hrtimer_get_remaining(h);
23570 + PFM_DBG_ovfl("hrtimer=%lld",
23571 + (long long)set->hrtimer_rem.tv64);
23572 + }
23573 + }
23574 +
23575 +#ifdef CONFIG_SMP
23576 + /*
23577 + * in SMP, release ownership of this PMU.
23578 + * PMU interrupts are masked, so nothing
23579 + * can happen.
23580 + */
23581 + pfm_set_pmu_owner(NULL, NULL);
23582 +
23583 + /*
23584 + * On some architectures, it is necessary to read the
23585 + * PMD registers to check for pending overflow in
23586 + * pfm_arch_ctxswout_thread(). In that case, saving of
23587 + * the PMDs may be done there and not here.
23588 + */
23589 + if (need_save_pmds)
23590 + pfm_save_pmds(ctx, set);
23591 +#endif
23592 + spin_unlock(&ctx->lock);
23593 +}
23594 +
23595 +/*
23596 + *
23597 + */
23598 +static void __pfm_ctxswout_sys(struct task_struct *prev,
23599 + struct task_struct *next)
23600 +{
23601 + struct pfm_context *ctx;
23602 +
23603 + ctx = __get_cpu_var(pmu_ctx);
23604 + BUG_ON(!ctx);
23605 +
23606 + /*
23607 + * propagate TIF_PERFMON_CTXSW to ensure that:
23608 + * - previous task has TIF_PERFMON_CTXSW cleared, in case it is
23609 + * scheduled onto another CPU where there is syswide monitoring
23610 + * - next task has TIF_PERFMON_CTXSW set to ensure it will come back
23611 + * here when context switched out
23612 + */
23613 + clear_tsk_thread_flag(prev, TIF_PERFMON_CTXSW);
23614 + set_tsk_thread_flag(next, TIF_PERFMON_CTXSW);
23615 +
23616 + /*
23617 + * nothing to do until actually started
23618 + * XXX: assumes no mean to start from user level
23619 + */
23620 + if (!ctx->flags.started)
23621 + return;
23622 +
23623 + pfm_arch_ctxswout_sys(prev, ctx);
23624 +}
23625 +
23626 +/*
23627 + *
23628 + */
23629 +static void __pfm_ctxswin_sys(struct task_struct *prev,
23630 + struct task_struct *next)
23631 +{
23632 + struct pfm_context *ctx;
23633 +
23634 + ctx = __get_cpu_var(pmu_ctx);
23635 + BUG_ON(!ctx);
23636 +
23637 + /*
23638 + * nothing to do until actually started
23639 + * XXX: assumes no mean to start from user level
23640 + */
23641 + if (!ctx->flags.started)
23642 + return;
23643 +
23644 + pfm_arch_ctxswin_sys(next, ctx);
23645 +}
23646 +
23647 +void pfm_ctxsw_out(struct task_struct *prev,
23648 + struct task_struct *next)
23649 +{
23650 + struct pfm_context *ctxp;
23651 + u64 now;
23652 +
23653 + now = sched_clock();
23654 +
23655 + ctxp = prev->pfm_context;
23656 +
23657 + if (ctxp)
23658 + __pfm_ctxswout_thread(prev, ctxp, now);
23659 + else
23660 + __pfm_ctxswout_sys(prev, next);
23661 +
23662 + pfm_stats_inc(ctxswout_count);
23663 + pfm_stats_add(ctxswout_ns, sched_clock() - now);
23664 +}
23665 +
23666 +void pfm_ctxsw_in(struct task_struct *prev,
23667 + struct task_struct *next)
23668 +{
23669 + struct pfm_context *ctxn;
23670 + u64 now;
23671 +
23672 + now = sched_clock();
23673 +
23674 + ctxn = next->pfm_context;
23675 +
23676 + if (ctxn)
23677 + __pfm_ctxswin_thread(next, ctxn, now);
23678 + else
23679 + __pfm_ctxswin_sys(prev, next);
23680 +
23681 + pfm_stats_inc(ctxswin_count);
23682 + pfm_stats_add(ctxswin_ns, sched_clock() - now);
23683 +}
23684 diff --git a/perfmon/perfmon_debugfs.c b/perfmon/perfmon_debugfs.c
23685 new file mode 100644
23686 index 0000000..e4d2fad
23687 --- /dev/null
23688 +++ b/perfmon/perfmon_debugfs.c
23689 @@ -0,0 +1,168 @@
23690 +/*
23691 + * perfmon_debugfs.c: perfmon2 statistics interface to debugfs
23692 + *
23693 + * This file implements the perfmon2 interface which
23694 + * provides access to the hardware performance counters
23695 + * of the host processor.
23696 + *
23697 + * The initial version of perfmon.c was written by
23698 + * Ganesh Venkitachalam, IBM Corp.
23699 + *
23700 + * Then it was modified for perfmon-1.x by Stephane Eranian and
23701 + * David Mosberger, Hewlett Packard Co.
23702 + *
23703 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
23704 + * by Stephane Eranian, Hewlett Packard Co.
23705 + *
23706 + * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
23707 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23708 + *
23709 + * More information about perfmon available at:
23710 + * http://perfmon2.sf.net
23711 + *
23712 + * This program is free software; you can redistribute it and/or
23713 + * modify it under the terms of version 2 of the GNU General Public
23714 + * License as published by the Free Software Foundation.
23715 + *
23716 + * This program is distributed in the hope that it will be useful,
23717 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23718 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23719 + * General Public License for more details.
23720 + *
23721 + * You should have received a copy of the GNU General Public License
23722 + * along with this program; if not, write to the Free Software
23723 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23724 + * 02111-1307 USA
23725 + */
23726 +#include <linux/kernel.h>
23727 +#include <linux/debugfs.h>
23728 +#include <linux/perfmon_kern.h>
23729 +
23730 +/*
23731 + * to make the statistics visible to user space:
23732 + * $ mount -t debugfs none /mnt
23733 + * $ cd /mnt/perfmon
23734 + * then choose a CPU subdir
23735 + */
23736 +DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
23737 +
23738 +static struct dentry *pfm_debugfs_dir;
23739 +
23740 +void pfm_reset_stats(int cpu)
23741 +{
23742 + struct pfm_stats *st;
23743 + unsigned long flags;
23744 +
23745 + st = &per_cpu(pfm_stats, cpu);
23746 +
23747 + local_irq_save(flags);
23748 + memset(st->v, 0, sizeof(st->v));
23749 + local_irq_restore(flags);
23750 +}
23751 +
23752 +static const char *pfm_stats_strs[] = {
23753 + "ovfl_intr_all_count",
23754 + "ovfl_intr_ns",
23755 + "ovfl_intr_spurious_count",
23756 + "ovfl_intr_replay_count",
23757 + "ovfl_intr_regular_count",
23758 + "handle_work_count",
23759 + "ovfl_notify_count",
23760 + "reset_pmds_count",
23761 + "pfm_restart_count",
23762 + "fmt_handler_calls",
23763 + "fmt_handler_ns",
23764 + "set_switch_count",
23765 + "set_switch_ns",
23766 + "set_switch_exp",
23767 + "ctxswin_count",
23768 + "ctxswin_ns",
23769 + "handle_timeout_count",
23770 + "ovfl_intr_nmi_count",
23771 + "ctxswout_count",
23772 + "ctxswout_ns",
23773 +};
23774 +#define PFM_NUM_STRS ARRAY_SIZE(pfm_stats_strs)
23775 +
23776 +void pfm_debugfs_del_cpu(int cpu)
23777 +{
23778 + struct pfm_stats *st;
23779 + int i;
23780 +
23781 + st = &per_cpu(pfm_stats, cpu);
23782 +
23783 + for (i = 0; i < PFM_NUM_STATS; i++) {
23784 + if (st->dirs[i])
23785 + debugfs_remove(st->dirs[i]);
23786 + st->dirs[i] = NULL;
23787 + }
23788 + if (st->cpu_dir)
23789 + debugfs_remove(st->cpu_dir);
23790 + st->cpu_dir = NULL;
23791 +}
23792 +
23793 +int pfm_debugfs_add_cpu(int cpu)
23794 +{
23795 + struct pfm_stats *st;
23796 + int i;
23797 +
23798 + /*
23799 + * sanity check between stats names and the number
23800 + * of entries in the pfm_stats value array.
23801 + */
23802 + if (PFM_NUM_STRS != PFM_NUM_STATS) {
23803 + PFM_ERR("PFM_NUM_STRS != PFM_NUM_STATS error");
23804 + return -1;
23805 + }
23806 +
23807 + st = &per_cpu(pfm_stats, cpu);
23808 + sprintf(st->cpu_name, "cpu%d", cpu);
23809 +
23810 + st->cpu_dir = debugfs_create_dir(st->cpu_name, pfm_debugfs_dir);
23811 + if (!st->cpu_dir)
23812 + return -1;
23813 +
23814 + for (i = 0; i < PFM_NUM_STATS; i++) {
23815 + st->dirs[i] = debugfs_create_u64(pfm_stats_strs[i],
23816 + S_IRUGO,
23817 + st->cpu_dir,
23818 + &st->v[i]);
23819 + if (!st->dirs[i])
23820 + goto error;
23821 + }
23822 + pfm_reset_stats(cpu);
23823 + return 0;
23824 +error:
23825 + while (i >= 0) {
23826 + debugfs_remove(st->dirs[i]);
23827 + i--;
23828 + }
23829 + debugfs_remove(st->cpu_dir);
23830 + return -1;
23831 +}
23832 +
23833 +/*
23834 + * called once from pfm_init()
23835 + */
23836 +int __init pfm_init_debugfs(void)
23837 +{
23838 + int cpu1, cpu2, ret;
23839 +
23840 + pfm_debugfs_dir = debugfs_create_dir("perfmon", NULL);
23841 + if (!pfm_debugfs_dir)
23842 + return -1;
23843 +
23844 + for_each_online_cpu(cpu1) {
23845 + ret = pfm_debugfs_add_cpu(cpu1);
23846 + if (ret)
23847 + goto error;
23848 + }
23849 + return 0;
23850 +error:
23851 + for_each_online_cpu(cpu2) {
23852 + if (cpu2 == cpu1)
23853 + break;
23854 + pfm_debugfs_del_cpu(cpu2);
23855 + }
23856 + return -1;
23857 +}
23858 diff --git a/perfmon/perfmon_dfl_smpl.c b/perfmon/perfmon_dfl_smpl.c
23859 new file mode 100644
23860 index 0000000..8c83489
23861 --- /dev/null
23862 +++ b/perfmon/perfmon_dfl_smpl.c
23863 @@ -0,0 +1,298 @@
23864 +/*
23865 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
23866 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
23867 + *
23868 + * This file implements the new default sampling buffer format
23869 + * for the perfmon2 subsystem.
23870 + *
23871 + * This program is free software; you can redistribute it and/or
23872 + * modify it under the terms of version 2 of the GNU General Public
23873 + * License as published by the Free Software Foundation.
23874 + *
23875 + * This program is distributed in the hope that it will be useful,
23876 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
23877 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23878 + * General Public License for more details.
23879 + *
23880 + * You should have received a copy of the GNU General Public License
23881 + * along with this program; if not, write to the Free Software
23882 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
23883 + * 02111-1307 USA
23884 + */
23885 +#include <linux/kernel.h>
23886 +#include <linux/types.h>
23887 +#include <linux/module.h>
23888 +#include <linux/init.h>
23889 +#include <linux/smp.h>
23890 +
23891 +#include <linux/perfmon_kern.h>
23892 +#include <linux/perfmon_dfl_smpl.h>
23893 +
23894 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
23895 +MODULE_DESCRIPTION("new perfmon default sampling format");
23896 +MODULE_LICENSE("GPL");
23897 +
23898 +static int pfm_dfl_fmt_validate(u32 ctx_flags, u16 npmds, void *data)
23899 +{
23900 + struct pfm_dfl_smpl_arg *arg = data;
23901 + u64 min_buf_size;
23902 +
23903 + if (data == NULL) {
23904 + PFM_DBG("no argument passed");
23905 + return -EINVAL;
23906 + }
23907 +
23908 + /*
23909 + * sanity check in case size_t is smaller then u64
23910 + */
23911 +#if BITS_PER_LONG == 4
23912 +#define MAX_SIZE_T (1ULL<<(sizeof(size_t)<<3))
23913 + if (sizeof(size_t) < sizeof(arg->buf_size)) {
23914 + if (arg->buf_size >= MAX_SIZE_T)
23915 + return -ETOOBIG;
23916 + }
23917 +#endif
23918 +
23919 + /*
23920 + * compute min buf size. npmds is the maximum number
23921 + * of implemented PMD registers.
23922 + */
23923 + min_buf_size = sizeof(struct pfm_dfl_smpl_hdr)
23924 + + (sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64)));
23925 +
23926 + PFM_DBG("validate ctx_flags=0x%x flags=0x%x npmds=%u "
23927 + "min_buf_size=%llu buf_size=%llu\n",
23928 + ctx_flags,
23929 + arg->buf_flags,
23930 + npmds,
23931 + (unsigned long long)min_buf_size,
23932 + (unsigned long long)arg->buf_size);
23933 +
23934 + /*
23935 + * must hold at least the buffer header + one minimally sized entry
23936 + */
23937 + if (arg->buf_size < min_buf_size)
23938 + return -EINVAL;
23939 +
23940 + return 0;
23941 +}
23942 +
23943 +static int pfm_dfl_fmt_get_size(u32 flags, void *data, size_t *size)
23944 +{
23945 + struct pfm_dfl_smpl_arg *arg = data;
23946 +
23947 + /*
23948 + * size has been validated in default_validate
23949 + * we can never loose bits from buf_size.
23950 + */
23951 + *size = (size_t)arg->buf_size;
23952 +
23953 + return 0;
23954 +}
23955 +
23956 +static int pfm_dfl_fmt_init(struct pfm_context *ctx, void *buf, u32 ctx_flags,
23957 + u16 npmds, void *data)
23958 +{
23959 + struct pfm_dfl_smpl_hdr *hdr;
23960 + struct pfm_dfl_smpl_arg *arg = data;
23961 +
23962 + hdr = buf;
23963 +
23964 + hdr->hdr_version = PFM_DFL_SMPL_VERSION;
23965 + hdr->hdr_buf_size = arg->buf_size;
23966 + hdr->hdr_buf_flags = arg->buf_flags;
23967 + hdr->hdr_cur_offs = sizeof(*hdr);
23968 + hdr->hdr_overflows = 0;
23969 + hdr->hdr_count = 0;
23970 + hdr->hdr_min_buf_space = sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64));
23971 + /*
23972 + * due to cache aliasing, it may be necessary to flush the cache
23973 + * on certain architectures (e.g., MIPS)
23974 + */
23975 + pfm_cacheflush(hdr, sizeof(*hdr));
23976 +
23977 + PFM_DBG("buffer=%p buf_size=%llu hdr_size=%zu hdr_version=%u.%u "
23978 + "min_space=%llu npmds=%u",
23979 + buf,
23980 + (unsigned long long)hdr->hdr_buf_size,
23981 + sizeof(*hdr),
23982 + PFM_VERSION_MAJOR(hdr->hdr_version),
23983 + PFM_VERSION_MINOR(hdr->hdr_version),
23984 + (unsigned long long)hdr->hdr_min_buf_space,
23985 + npmds);
23986 +
23987 + return 0;
23988 +}
23989 +
23990 +/*
23991 + * called from pfm_overflow_handler() to record a new sample
23992 + *
23993 + * context is locked, interrupts are disabled (no preemption)
23994 + */
23995 +static int pfm_dfl_fmt_handler(struct pfm_context *ctx,
23996 + unsigned long ip, u64 tstamp, void *data)
23997 +{
23998 + struct pfm_dfl_smpl_hdr *hdr;
23999 + struct pfm_dfl_smpl_entry *ent;
24000 + struct pfm_ovfl_arg *arg;
24001 + void *cur, *last;
24002 + u64 *e;
24003 + size_t entry_size, min_size;
24004 + u16 npmds, i;
24005 + u16 ovfl_pmd;
24006 + void *buf;
24007 +
24008 + hdr = ctx->smpl_addr;
24009 + arg = &ctx->ovfl_arg;
24010 +
24011 + buf = hdr;
24012 + cur = buf+hdr->hdr_cur_offs;
24013 + last = buf+hdr->hdr_buf_size;
24014 + ovfl_pmd = arg->ovfl_pmd;
24015 + min_size = hdr->hdr_min_buf_space;
24016 +
24017 + /*
24018 + * precheck for sanity
24019 + */
24020 + if ((last - cur) < min_size)
24021 + goto full;
24022 +
24023 + npmds = arg->num_smpl_pmds;
24024 +
24025 + ent = (struct pfm_dfl_smpl_entry *)cur;
24026 +
24027 + entry_size = sizeof(*ent) + (npmds << 3);
24028 +
24029 + /* position for first pmd */
24030 + e = (u64 *)(ent+1);
24031 +
24032 + hdr->hdr_count++;
24033 +
24034 + PFM_DBG_ovfl("count=%llu cur=%p last=%p free_bytes=%zu ovfl_pmd=%d "
24035 + "npmds=%u",
24036 + (unsigned long long)hdr->hdr_count,
24037 + cur, last,
24038 + (last-cur),
24039 + ovfl_pmd,
24040 + npmds);
24041 +
24042 + /*
24043 + * current = task running at the time of the overflow.
24044 + *
24045 + * per-task mode:
24046 + * - this is usually the task being monitored.
24047 + * Under certain conditions, it might be a different task
24048 + *
24049 + * system-wide:
24050 + * - this is not necessarily the task controlling the session
24051 + */
24052 + ent->pid = current->pid;
24053 + ent->ovfl_pmd = ovfl_pmd;
24054 + ent->last_reset_val = arg->pmd_last_reset;
24055 +
24056 + /*
24057 + * where did the fault happen (includes slot number)
24058 + */
24059 + ent->ip = ip;
24060 +
24061 + ent->tstamp = tstamp;
24062 + ent->cpu = smp_processor_id();
24063 + ent->set = arg->active_set;
24064 + ent->tgid = current->tgid;
24065 +
24066 + /*
24067 + * selectively store PMDs in increasing index number
24068 + */
24069 + if (npmds) {
24070 + u64 *val = arg->smpl_pmds_values;
24071 + for (i = 0; i < npmds; i++)
24072 + *e++ = *val++;
24073 + }
24074 +
24075 + /*
24076 + * update position for next entry
24077 + */
24078 + hdr->hdr_cur_offs += entry_size;
24079 + cur += entry_size;
24080 +
24081 + pfm_cacheflush(hdr, sizeof(*hdr));
24082 + pfm_cacheflush(ent, entry_size);
24083 +
24084 + /*
24085 + * post check to avoid losing the last sample
24086 + */
24087 + if ((last - cur) < min_size)
24088 + goto full;
24089 +
24090 + /* reset before returning from interrupt handler */
24091 + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
24092 +
24093 + return 0;
24094 +full:
24095 + PFM_DBG_ovfl("sampling buffer full free=%zu, count=%llu",
24096 + last-cur,
24097 + (unsigned long long)hdr->hdr_count);
24098 +
24099 + /*
24100 + * increment number of buffer overflows.
24101 + * important to detect duplicate set of samples.
24102 + */
24103 + hdr->hdr_overflows++;
24104 +
24105 + /*
24106 + * request notification and masking of monitoring.
24107 + * Notification is still subject to the overflowed
24108 + * register having the FL_NOTIFY flag set.
24109 + */
24110 + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
24111 +
24112 + return -ENOBUFS; /* we are full, sorry */
24113 +}
24114 +
24115 +static int pfm_dfl_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf)
24116 +{
24117 + struct pfm_dfl_smpl_hdr *hdr;
24118 +
24119 + hdr = buf;
24120 +
24121 + hdr->hdr_count = 0;
24122 + hdr->hdr_cur_offs = sizeof(*hdr);
24123 +
24124 + pfm_cacheflush(hdr, sizeof(*hdr));
24125 +
24126 + *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
24127 +
24128 + return 0;
24129 +}
24130 +
24131 +static int pfm_dfl_fmt_exit(void *buf)
24132 +{
24133 + return 0;
24134 +}
24135 +
24136 +static struct pfm_smpl_fmt dfl_fmt = {
24137 + .fmt_name = "default",
24138 + .fmt_version = 0x10000,
24139 + .fmt_arg_size = sizeof(struct pfm_dfl_smpl_arg),
24140 + .fmt_validate = pfm_dfl_fmt_validate,
24141 + .fmt_getsize = pfm_dfl_fmt_get_size,
24142 + .fmt_init = pfm_dfl_fmt_init,
24143 + .fmt_handler = pfm_dfl_fmt_handler,
24144 + .fmt_restart = pfm_dfl_fmt_restart,
24145 + .fmt_exit = pfm_dfl_fmt_exit,
24146 + .fmt_flags = PFM_FMT_BUILTIN_FLAG,
24147 + .owner = THIS_MODULE
24148 +};
24149 +
24150 +static int pfm_dfl_fmt_init_module(void)
24151 +{
24152 + return pfm_fmt_register(&dfl_fmt);
24153 +}
24154 +
24155 +static void pfm_dfl_fmt_cleanup_module(void)
24156 +{
24157 + pfm_fmt_unregister(&dfl_fmt);
24158 +}
24159 +
24160 +module_init(pfm_dfl_fmt_init_module);
24161 +module_exit(pfm_dfl_fmt_cleanup_module);
24162 diff --git a/perfmon/perfmon_file.c b/perfmon/perfmon_file.c
24163 new file mode 100644
24164 index 0000000..1cde81b
24165 --- /dev/null
24166 +++ b/perfmon/perfmon_file.c
24167 @@ -0,0 +1,751 @@
24168 +/*
24169 + * perfmon_file.c: perfmon2 file input/output functions
24170 + *
24171 + * This file implements the perfmon2 interface which
24172 + * provides access to the hardware performance counters
24173 + * of the host processor.
24174 + *
24175 + * The initial version of perfmon.c was written by
24176 + * Ganesh Venkitachalam, IBM Corp.
24177 + *
24178 + * Then it was modified for perfmon-1.x by Stephane Eranian and
24179 + * David Mosberger, Hewlett Packard Co.
24180 + *
24181 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
24182 + * by Stephane Eranian, Hewlett Packard Co.
24183 + *
24184 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
24185 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
24186 + * David Mosberger-Tang <davidm@hpl.hp.com>
24187 + *
24188 + * More information about perfmon available at:
24189 + * http://perfmon2.sf.net
24190 + *
24191 + * This program is free software; you can redistribute it and/or
24192 + * modify it under the terms of version 2 of the GNU General Public
24193 + * License as published by the Free Software Foundation.
24194 + *
24195 + * This program is distributed in the hope that it will be useful,
24196 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24197 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24198 + * General Public License for more details.
24199 + *
24200 + * You should have received a copy of the GNU General Public License
24201 + * along with this program; if not, write to the Free Software
24202 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24203 + * 02111-1307 USA
24204 + */
24205 +#include <linux/kernel.h>
24206 +#include <linux/module.h>
24207 +#include <linux/file.h>
24208 +#include <linux/poll.h>
24209 +#include <linux/vfs.h>
24210 +#include <linux/pagemap.h>
24211 +#include <linux/mount.h>
24212 +#include <linux/perfmon_kern.h>
24213 +#include "perfmon_priv.h"
24214 +
24215 +#define PFMFS_MAGIC 0xa0b4d889 /* perfmon filesystem magic number */
24216 +
24217 +struct pfm_controls pfm_controls = {
24218 + .sys_group = PFM_GROUP_PERM_ANY,
24219 + .task_group = PFM_GROUP_PERM_ANY,
24220 + .arg_mem_max = PAGE_SIZE,
24221 + .smpl_buffer_mem_max = ~0,
24222 +};
24223 +EXPORT_SYMBOL(pfm_controls);
24224 +
24225 +static int __init enable_debug(char *str)
24226 +{
24227 + pfm_controls.debug = 1;
24228 + PFM_INFO("debug output enabled\n");
24229 + return 1;
24230 +}
24231 +__setup("perfmon_debug", enable_debug);
24232 +
24233 +static int pfmfs_delete_dentry(struct dentry *dentry)
24234 +{
24235 + return 1;
24236 +}
24237 +
24238 +static struct dentry_operations pfmfs_dentry_operations = {
24239 + .d_delete = pfmfs_delete_dentry,
24240 +};
24241 +
24242 +int pfm_buf_map_pagefault(struct vm_area_struct *vma, struct vm_fault *vmf)
24243 +{
24244 + void *kaddr;
24245 + unsigned long address;
24246 + struct pfm_context *ctx;
24247 + size_t size;
24248 +
24249 + address = (unsigned long)vmf->virtual_address;
24250 +
24251 + ctx = vma->vm_private_data;
24252 + if (ctx == NULL) {
24253 + PFM_DBG("no ctx");
24254 + return VM_FAULT_SIGBUS;
24255 + }
24256 + /*
24257 + * size available to user (maybe different from real_smpl_size
24258 + */
24259 + size = ctx->smpl_size;
24260 +
24261 + if ((address < vma->vm_start) ||
24262 + (address >= (vma->vm_start + size)))
24263 + return VM_FAULT_SIGBUS;
24264 +
24265 + kaddr = ctx->smpl_addr + (address - vma->vm_start);
24266 +
24267 + vmf->page = vmalloc_to_page(kaddr);
24268 + get_page(vmf->page);
24269 +
24270 + PFM_DBG("[%d] start=%p ref_count=%d",
24271 + current->pid,
24272 + kaddr, page_count(vmf->page));
24273 +
24274 + return 0;
24275 +}
24276 +
24277 +/*
24278 + * we need to determine whther or not we are closing the last reference
24279 + * to the file and thus are going to end up in pfm_close() which eventually
24280 + * calls pfm_release_buf_space(). In that function, we update the accouting
24281 + * for locked_vm given that we are actually freeing the sampling buffer. The
24282 + * issue is that there are multiple paths leading to pfm_release_buf_space(),
24283 + * from exit(), munmap(), close(). The path coming from munmap() is problematic
24284 + * becuse do_munmap() grabs mmap_sem in write-mode which is also what
24285 + * pfm_release_buf_space does. To avoid deadlock, we need to determine where
24286 + * we are calling from and skip the locking. The vm_ops->close() callback
24287 + * is invoked for each remove_vma() independently of the number of references
24288 + * left on the file descriptor, therefore simple reference counter does not
24289 + * work. We need to determine if this is the last call, and then set a flag
24290 + * to skip the locking.
24291 + */
24292 +static void pfm_buf_map_close(struct vm_area_struct *vma)
24293 +{
24294 + struct file *file;
24295 + struct pfm_context *ctx;
24296 +
24297 + file = vma->vm_file;
24298 + ctx = vma->vm_private_data;
24299 +
24300 + /*
24301 + * if file is going to close, then pfm_close() will
24302 + * be called, do not lock in pfm_release_buf
24303 + */
24304 + if (atomic_read(&file->f_count) == 1)
24305 + ctx->flags.mmap_nlock = 1;
24306 +}
24307 +
24308 +/*
24309 + * we do not have a close callback because, the locked
24310 + * memory accounting must be done when the actual buffer
24311 + * is freed. Munmap does not free the page backing the vma
24312 + * because they may still be in use by the PMU interrupt handler.
24313 + */
24314 +struct vm_operations_struct pfm_buf_map_vm_ops = {
24315 + .fault = pfm_buf_map_pagefault,
24316 + .close = pfm_buf_map_close
24317 +};
24318 +
24319 +static int pfm_mmap_buffer(struct pfm_context *ctx, struct vm_area_struct *vma,
24320 + size_t size)
24321 +{
24322 + if (ctx->smpl_addr == NULL) {
24323 + PFM_DBG("no sampling buffer to map");
24324 + return -EINVAL;
24325 + }
24326 +
24327 + if (size > ctx->smpl_size) {
24328 + PFM_DBG("mmap size=%zu >= actual buf size=%zu",
24329 + size,
24330 + ctx->smpl_size);
24331 + return -EINVAL;
24332 + }
24333 +
24334 + vma->vm_ops = &pfm_buf_map_vm_ops;
24335 + vma->vm_private_data = ctx;
24336 +
24337 + return 0;
24338 +}
24339 +
24340 +static int pfm_mmap(struct file *file, struct vm_area_struct *vma)
24341 +{
24342 + size_t size;
24343 + struct pfm_context *ctx;
24344 + unsigned long flags;
24345 + int ret;
24346 +
24347 + PFM_DBG("pfm_file_ops");
24348 +
24349 + ctx = file->private_data;
24350 + size = (vma->vm_end - vma->vm_start);
24351 +
24352 + if (ctx == NULL)
24353 + return -EINVAL;
24354 +
24355 + ret = -EINVAL;
24356 +
24357 + spin_lock_irqsave(&ctx->lock, flags);
24358 +
24359 + if (vma->vm_flags & VM_WRITE) {
24360 + PFM_DBG("cannot map buffer for writing");
24361 + goto done;
24362 + }
24363 +
24364 + PFM_DBG("vm_pgoff=%lu size=%zu vm_start=0x%lx",
24365 + vma->vm_pgoff,
24366 + size,
24367 + vma->vm_start);
24368 +
24369 + ret = pfm_mmap_buffer(ctx, vma, size);
24370 + if (ret == 0)
24371 + vma->vm_flags |= VM_RESERVED;
24372 +
24373 + PFM_DBG("ret=%d vma_flags=0x%lx vma_start=0x%lx vma_size=%lu",
24374 + ret,
24375 + vma->vm_flags,
24376 + vma->vm_start,
24377 + vma->vm_end-vma->vm_start);
24378 +done:
24379 + spin_unlock_irqrestore(&ctx->lock, flags);
24380 +
24381 + return ret;
24382 +}
24383 +
24384 +/*
24385 + * Extract one message from queue.
24386 + *
24387 + * return:
24388 + * -EAGAIN: when non-blocking and nothing is* in the queue.
24389 + * -ERESTARTSYS: when blocking and signal is pending
24390 + * Otherwise returns size of message (sizeof(pfarg_msg))
24391 + */
24392 +ssize_t __pfm_read(struct pfm_context *ctx, union pfarg_msg *msg_buf, int non_block)
24393 +{
24394 + ssize_t ret = 0;
24395 + unsigned long flags;
24396 + DECLARE_WAITQUEUE(wait, current);
24397 +
24398 + /*
24399 + * we must masks interrupts to avoid a race condition
24400 + * with the PMU interrupt handler.
24401 + */
24402 + spin_lock_irqsave(&ctx->lock, flags);
24403 +
24404 + while (pfm_msgq_is_empty(ctx)) {
24405 +
24406 + /*
24407 + * handle non-blocking reads
24408 + * return -EAGAIN
24409 + */
24410 + ret = -EAGAIN;
24411 + if (non_block)
24412 + break;
24413 +
24414 + add_wait_queue(&ctx->msgq_wait, &wait);
24415 + set_current_state(TASK_INTERRUPTIBLE);
24416 +
24417 + spin_unlock_irqrestore(&ctx->lock, flags);
24418 +
24419 + schedule();
24420 +
24421 + /*
24422 + * during this window, another thread may call
24423 + * pfm_read() and steal our message
24424 + */
24425 +
24426 + spin_lock_irqsave(&ctx->lock, flags);
24427 +
24428 + remove_wait_queue(&ctx->msgq_wait, &wait);
24429 + set_current_state(TASK_RUNNING);
24430 +
24431 + /*
24432 + * check for pending signals
24433 + * return -ERESTARTSYS
24434 + */
24435 + ret = -ERESTARTSYS;
24436 + if (signal_pending(current))
24437 + break;
24438 +
24439 + /*
24440 + * we may have a message
24441 + */
24442 + ret = 0;
24443 + }
24444 +
24445 + /*
24446 + * extract message
24447 + */
24448 + if (ret == 0) {
24449 + /*
24450 + * copy the oldest message into msg_buf.
24451 + * We cannot directly call copy_to_user()
24452 + * because interrupts masked. This is done
24453 + * in the caller
24454 + */
24455 + pfm_get_next_msg(ctx, msg_buf);
24456 +
24457 + ret = sizeof(*msg_buf);
24458 +
24459 + PFM_DBG("extracted type=%d", msg_buf->type);
24460 + }
24461 +
24462 + spin_unlock_irqrestore(&ctx->lock, flags);
24463 +
24464 + PFM_DBG("blocking=%d ret=%zd", non_block, ret);
24465 +
24466 + return ret;
24467 +}
24468 +
24469 +static ssize_t pfm_read(struct file *filp, char __user *buf, size_t size,
24470 + loff_t *ppos)
24471 +{
24472 + struct pfm_context *ctx;
24473 + union pfarg_msg msg_buf;
24474 + int non_block, ret;
24475 +
24476 + PFM_DBG_ovfl("buf=%p size=%zu", buf, size);
24477 +
24478 + ctx = filp->private_data;
24479 + if (ctx == NULL) {
24480 + PFM_ERR("no ctx for pfm_read");
24481 + return -EINVAL;
24482 + }
24483 +
24484 + non_block = filp->f_flags & O_NONBLOCK;
24485 +
24486 +#ifdef CONFIG_IA64_PERFMON_COMPAT
24487 + /*
24488 + * detect IA-64 v2.0 context read (message size is different)
24489 + * nops on all other architectures
24490 + */
24491 + if (unlikely(ctx->flags.ia64_v20_compat))
24492 + return pfm_arch_compat_read(ctx, buf, non_block, size);
24493 +#endif
24494 + /*
24495 + * cannot extract partial messages.
24496 + * check even when there is no message
24497 + *
24498 + * cannot extract more than one message per call. Bytes
24499 + * above sizeof(msg) are ignored.
24500 + */
24501 + if (size < sizeof(msg_buf)) {
24502 + PFM_DBG("message is too small size=%zu must be >=%zu)",
24503 + size,
24504 + sizeof(msg_buf));
24505 + return -EINVAL;
24506 + }
24507 +
24508 + ret = __pfm_read(ctx, &msg_buf, non_block);
24509 + if (ret > 0) {
24510 + if (copy_to_user(buf, &msg_buf, sizeof(msg_buf)))
24511 + ret = -EFAULT;
24512 + }
24513 + PFM_DBG_ovfl("ret=%d", ret);
24514 + return ret;
24515 +}
24516 +
24517 +static ssize_t pfm_write(struct file *file, const char __user *ubuf,
24518 + size_t size, loff_t *ppos)
24519 +{
24520 + PFM_DBG("pfm_write called");
24521 + return -EINVAL;
24522 +}
24523 +
24524 +static unsigned int pfm_poll(struct file *filp, poll_table *wait)
24525 +{
24526 + struct pfm_context *ctx;
24527 + unsigned long flags;
24528 + unsigned int mask = 0;
24529 +
24530 + PFM_DBG("pfm_file_ops");
24531 +
24532 + if (filp->f_op != &pfm_file_ops) {
24533 + PFM_ERR("pfm_poll bad magic");
24534 + return 0;
24535 + }
24536 +
24537 + ctx = filp->private_data;
24538 + if (ctx == NULL) {
24539 + PFM_ERR("pfm_poll no ctx");
24540 + return 0;
24541 + }
24542 +
24543 + PFM_DBG("before poll_wait");
24544 +
24545 + poll_wait(filp, &ctx->msgq_wait, wait);
24546 +
24547 + /*
24548 + * pfm_msgq_is_empty() is non-atomic
24549 + *
24550 + * filp is protected by fget() at upper level
24551 + * context cannot be closed by another thread.
24552 + *
24553 + * There may be a race with a PMU interrupt adding
24554 + * messages to the queue. But we are interested in
24555 + * queue not empty, so adding more messages should
24556 + * not really be a problem.
24557 + *
24558 + * There may be a race with another thread issuing
24559 + * a read() and stealing messages from the queue thus
24560 + * may return the wrong answer. This could potentially
24561 + * lead to a blocking read, because nothing is
24562 + * available in the queue
24563 + */
24564 + spin_lock_irqsave(&ctx->lock, flags);
24565 +
24566 + if (!pfm_msgq_is_empty(ctx))
24567 + mask = POLLIN | POLLRDNORM;
24568 +
24569 + spin_unlock_irqrestore(&ctx->lock, flags);
24570 +
24571 + PFM_DBG("after poll_wait mask=0x%x", mask);
24572 +
24573 + return mask;
24574 +}
24575 +
24576 +static int pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
24577 + unsigned long arg)
24578 +{
24579 + PFM_DBG("pfm_ioctl called");
24580 + return -EINVAL;
24581 +}
24582 +
24583 +/*
24584 + * interrupt cannot be masked when entering this function
24585 + */
24586 +static inline int __pfm_fasync(int fd, struct file *filp,
24587 + struct pfm_context *ctx, int on)
24588 +{
24589 + int ret;
24590 +
24591 + PFM_DBG("in fd=%d on=%d async_q=%p",
24592 + fd,
24593 + on,
24594 + ctx->async_queue);
24595 +
24596 + ret = fasync_helper(fd, filp, on, &ctx->async_queue);
24597 +
24598 + PFM_DBG("out fd=%d on=%d async_q=%p ret=%d",
24599 + fd,
24600 + on,
24601 + ctx->async_queue, ret);
24602 +
24603 + return ret;
24604 +}
24605 +
24606 +static int pfm_fasync(int fd, struct file *filp, int on)
24607 +{
24608 + struct pfm_context *ctx;
24609 + int ret;
24610 +
24611 + PFM_DBG("pfm_file_ops");
24612 +
24613 + ctx = filp->private_data;
24614 + if (ctx == NULL) {
24615 + PFM_ERR("pfm_fasync no ctx");
24616 + return -EBADF;
24617 + }
24618 +
24619 + /*
24620 + * we cannot mask interrupts during this call because this may
24621 + * may go to sleep if memory is not readily avalaible.
24622 + *
24623 + * We are protected from the context disappearing by the
24624 + * get_fd()/put_fd() done in caller. Serialization of this function
24625 + * is ensured by caller.
24626 + */
24627 + ret = __pfm_fasync(fd, filp, ctx, on);
24628 +
24629 + PFM_DBG("pfm_fasync called on fd=%d on=%d async_queue=%p ret=%d",
24630 + fd,
24631 + on,
24632 + ctx->async_queue, ret);
24633 +
24634 + return ret;
24635 +}
24636 +
24637 +#ifdef CONFIG_SMP
24638 +static void __pfm_close_remote_cpu(void *info)
24639 +{
24640 + struct pfm_context *ctx = info;
24641 + int can_release;
24642 +
24643 + BUG_ON(ctx != __get_cpu_var(pmu_ctx));
24644 +
24645 + /*
24646 + * we are in IPI interrupt handler which has always higher
24647 + * priority than PMU interrupt, therefore we do not need to
24648 + * mask interrupts. context locking is not needed because we
24649 + * are in close(), no more user references.
24650 + *
24651 + * can_release is ignored, release done on calling CPU
24652 + */
24653 + __pfm_unload_context(ctx, &can_release);
24654 +
24655 + /*
24656 + * we cannot free context here because we are in_interrupt().
24657 + * we free on the calling CPU
24658 + */
24659 +}
24660 +
24661 +static int pfm_close_remote_cpu(u32 cpu, struct pfm_context *ctx)
24662 +{
24663 + BUG_ON(irqs_disabled());
24664 + return smp_call_function_single(cpu, __pfm_close_remote_cpu, ctx, 1);
24665 +}
24666 +#endif /* CONFIG_SMP */
24667 +
24668 +/*
24669 + * called either on explicit close() or from exit_files().
24670 + * Only the LAST user of the file gets to this point, i.e., it is
24671 + * called only ONCE.
24672 + *
24673 + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
24674 + * (fput()),i.e, last task to access the file. Nobody else can access the
24675 + * file at this point.
24676 + *
24677 + * When called from exit_files(), the VMA has been freed because exit_mm()
24678 + * is executed before exit_files().
24679 + *
24680 + * When called from exit_files(), the current task is not yet ZOMBIE but we
24681 + * flush the PMU state to the context.
24682 + */
24683 +int __pfm_close(struct pfm_context *ctx, struct file *filp)
24684 +{
24685 + unsigned long flags;
24686 + int state;
24687 + int can_free = 1, can_unload = 1;
24688 + int is_system, can_release = 0;
24689 + u32 cpu;
24690 +
24691 + /*
24692 + * no risk of ctx of filp disappearing so we can operate outside
24693 + * of spin_lock(). fasync_helper() runs with interrupts masked,
24694 + * thus there is no risk with the PMU interrupt handler
24695 + *
24696 + * In case of zombie, we will not have the async struct anymore
24697 + * thus kill_fasync() will not do anything
24698 + *
24699 + * fd is not used when removing the entry so we pass -1
24700 + */
24701 + if (filp->f_flags & FASYNC)
24702 + __pfm_fasync (-1, filp, ctx, 0);
24703 +
24704 + spin_lock_irqsave(&ctx->lock, flags);
24705 +
24706 + state = ctx->state;
24707 + is_system = ctx->flags.system;
24708 + cpu = ctx->cpu;
24709 +
24710 + PFM_DBG("state=%d", state);
24711 +
24712 + /*
24713 + * check if unload is needed
24714 + */
24715 + if (state == PFM_CTX_UNLOADED)
24716 + goto doit;
24717 +
24718 +#ifdef CONFIG_SMP
24719 + /*
24720 + * we need to release the resource on the ORIGINAL cpu.
24721 + * we need to release the context lock to avoid deadlocks
24722 + * on the original CPU, especially in the context switch
24723 + * routines. It is safe to unlock because we are in close(),
24724 + * in other words, there is no more access from user level.
24725 + * we can also unmask interrupts on this CPU because the
24726 + * context is running on the original CPU. Context will be
24727 + * unloaded and the session will be released on the original
24728 + * CPU. Upon return, the caller is guaranteed that the context
24729 + * is gone from original CPU.
24730 + */
24731 + if (is_system && cpu != smp_processor_id()) {
24732 + spin_unlock_irqrestore(&ctx->lock, flags);
24733 + pfm_close_remote_cpu(cpu, ctx);
24734 + can_release = 1;
24735 + goto free_it;
24736 + }
24737 +
24738 + if (!is_system && ctx->task != current) {
24739 + /*
24740 + * switch context to zombie state
24741 + */
24742 + ctx->state = PFM_CTX_ZOMBIE;
24743 +
24744 + PFM_DBG("zombie ctx for [%d]", ctx->task->pid);
24745 + /*
24746 + * must check if other thread is using block overflow
24747 + * notification mode. If so make sure it will not block
24748 + * because there will not be any pfm_restart() issued.
24749 + * When the thread notices the ZOMBIE state, it will clean
24750 + * up what is left of the context
24751 + */
24752 + if (state == PFM_CTX_MASKED && ctx->flags.block) {
24753 + /*
24754 + * force task to wake up from MASKED state
24755 + */
24756 + PFM_DBG("waking up [%d]", ctx->task->pid);
24757 +
24758 + complete(&ctx->restart_complete);
24759 + }
24760 + /*
24761 + * PMU session will be release by monitored task when it notices
24762 + * ZOMBIE state as part of pfm_unload_context()
24763 + */
24764 + can_unload = can_free = 0;
24765 + }
24766 +#endif
24767 + if (can_unload)
24768 + __pfm_unload_context(ctx, &can_release);
24769 +doit:
24770 + spin_unlock_irqrestore(&ctx->lock, flags);
24771 +
24772 +#ifdef CONFIG_SMP
24773 +free_it:
24774 +#endif
24775 + if (can_release)
24776 + pfm_session_release(is_system, cpu);
24777 +
24778 + if (can_free)
24779 + pfm_free_context(ctx);
24780 +
24781 + return 0;
24782 +}
24783 +
24784 +static int pfm_close(struct inode *inode, struct file *filp)
24785 +{
24786 + struct pfm_context *ctx;
24787 +
24788 + PFM_DBG("called filp=%p", filp);
24789 +
24790 + ctx = filp->private_data;
24791 + if (ctx == NULL) {
24792 + PFM_ERR("no ctx");
24793 + return -EBADF;
24794 + }
24795 + return __pfm_close(ctx, filp);
24796 +}
24797 +
24798 +static int pfm_no_open(struct inode *irrelevant, struct file *dontcare)
24799 +{
24800 + PFM_DBG("pfm_file_ops");
24801 +
24802 + return -ENXIO;
24803 +}
24804 +
24805 +
24806 +const struct file_operations pfm_file_ops = {
24807 + .llseek = no_llseek,
24808 + .read = pfm_read,
24809 + .write = pfm_write,
24810 + .poll = pfm_poll,
24811 + .ioctl = pfm_ioctl,
24812 + .open = pfm_no_open, /* special open to disallow open via /proc */
24813 + .fasync = pfm_fasync,
24814 + .release = pfm_close,
24815 + .mmap = pfm_mmap
24816 +};
24817 +
24818 +static int pfmfs_get_sb(struct file_system_type *fs_type,
24819 + int flags, const char *dev_name,
24820 + void *data, struct vfsmount *mnt)
24821 +{
24822 + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
24823 +}
24824 +
24825 +static struct file_system_type pfm_fs_type = {
24826 + .name = "pfmfs",
24827 + .get_sb = pfmfs_get_sb,
24828 + .kill_sb = kill_anon_super,
24829 +};
24830 +
24831 +/*
24832 + * pfmfs should _never_ be mounted by userland - too much of security hassle,
24833 + * no real gain from having the whole whorehouse mounted. So we don't need
24834 + * any operations on the root directory. However, we need a non-trivial
24835 + * d_name - pfm: will go nicely and kill the special-casing in procfs.
24836 + */
24837 +static struct vfsmount *pfmfs_mnt;
24838 +
24839 +int __init pfm_init_fs(void)
24840 +{
24841 + int err = register_filesystem(&pfm_fs_type);
24842 + if (!err) {
24843 + pfmfs_mnt = kern_mount(&pfm_fs_type);
24844 + err = PTR_ERR(pfmfs_mnt);
24845 + if (IS_ERR(pfmfs_mnt))
24846 + unregister_filesystem(&pfm_fs_type);
24847 + else
24848 + err = 0;
24849 + }
24850 + return err;
24851 +}
24852 +
24853 +int pfm_alloc_fd(struct file **cfile)
24854 +{
24855 + int fd, ret = 0;
24856 + struct file *file = NULL;
24857 + struct inode * inode;
24858 + char name[32];
24859 + struct qstr this;
24860 +
24861 + fd = get_unused_fd();
24862 + if (fd < 0)
24863 + return -ENFILE;
24864 +
24865 + ret = -ENFILE;
24866 +
24867 + file = get_empty_filp();
24868 + if (!file)
24869 + goto out;
24870 +
24871 + /*
24872 + * allocate a new inode
24873 + */
24874 + inode = new_inode(pfmfs_mnt->mnt_sb);
24875 + if (!inode)
24876 + goto out;
24877 +
24878 + PFM_DBG("new inode ino=%ld @%p", inode->i_ino, inode);
24879 +
24880 + inode->i_sb = pfmfs_mnt->mnt_sb;
24881 + inode->i_mode = S_IFCHR|S_IRUGO;
24882 + inode->i_uid = current->fsuid;
24883 + inode->i_gid = current->fsgid;
24884 +
24885 + sprintf(name, "[%lu]", inode->i_ino);
24886 + this.name = name;
24887 + this.hash = inode->i_ino;
24888 + this.len = strlen(name);
24889 +
24890 + ret = -ENOMEM;
24891 +
24892 + /*
24893 + * allocate a new dcache entry
24894 + */
24895 + file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
24896 + if (!file->f_dentry)
24897 + goto out;
24898 +
24899 + file->f_dentry->d_op = &pfmfs_dentry_operations;
24900 +
24901 + d_add(file->f_dentry, inode);
24902 + file->f_vfsmnt = mntget(pfmfs_mnt);
24903 + file->f_mapping = inode->i_mapping;
24904 +
24905 + file->f_op = &pfm_file_ops;
24906 + file->f_mode = FMODE_READ;
24907 + file->f_flags = O_RDONLY;
24908 + file->f_pos = 0;
24909 +
24910 + *cfile = file;
24911 +
24912 + return fd;
24913 +out:
24914 + if (file)
24915 + put_filp(file);
24916 + put_unused_fd(fd);
24917 + return ret;
24918 +}
24919 diff --git a/perfmon/perfmon_fmt.c b/perfmon/perfmon_fmt.c
24920 new file mode 100644
24921 index 0000000..27c4340
24922 --- /dev/null
24923 +++ b/perfmon/perfmon_fmt.c
24924 @@ -0,0 +1,219 @@
24925 +/*
24926 + * perfmon_fmt.c: perfmon2 sampling buffer format management
24927 + *
24928 + * This file implements the perfmon2 interface which
24929 + * provides access to the hardware performance counters
24930 + * of the host processor.
24931 + *
24932 + * The initial version of perfmon.c was written by
24933 + * Ganesh Venkitachalam, IBM Corp.
24934 + *
24935 + * Then it was modified for perfmon-1.x by Stephane Eranian and
24936 + * David Mosberger, Hewlett Packard Co.
24937 + *
24938 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
24939 + * by Stephane Eranian, Hewlett Packard Co.
24940 + *
24941 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
24942 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
24943 + * David Mosberger-Tang <davidm@hpl.hp.com>
24944 + *
24945 + * More information about perfmon available at:
24946 + * http://perfmon2.sf.net
24947 + *
24948 + * This program is free software; you can redistribute it and/or
24949 + * modify it under the terms of version 2 of the GNU General Public
24950 + * License as published by the Free Software Foundation.
24951 + *
24952 + * This program is distributed in the hope that it will be useful,
24953 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
24954 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24955 + * General Public License for more details.
24956 + *
24957 + * You should have received a copy of the GNU General Public License
24958 + * along with this program; if not, write to the Free Software
24959 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24960 + * 02111-1307 USA
24961 + */
24962 +#include <linux/module.h>
24963 +#include <linux/perfmon_kern.h>
24964 +#include "perfmon_priv.h"
24965 +
24966 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_smpl_fmt_lock);
24967 +static LIST_HEAD(pfm_smpl_fmt_list);
24968 +
24969 +static inline int fmt_is_mod(struct pfm_smpl_fmt *f)
24970 +{
24971 + return !(f->fmt_flags & PFM_FMTFL_IS_BUILTIN);
24972 +}
24973 +
24974 +static struct pfm_smpl_fmt *pfm_find_fmt(char *name)
24975 +{
24976 + struct pfm_smpl_fmt *entry;
24977 +
24978 + list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) {
24979 + if (!strcmp(entry->fmt_name, name))
24980 + return entry;
24981 + }
24982 + return NULL;
24983 +}
24984 +/*
24985 + * find a buffer format based on its name
24986 + */
24987 +struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name)
24988 +{
24989 + struct pfm_smpl_fmt *fmt;
24990 +
24991 + spin_lock(&pfm_smpl_fmt_lock);
24992 +
24993 + fmt = pfm_find_fmt(name);
24994 +
24995 + /*
24996 + * increase module refcount
24997 + */
24998 + if (fmt && fmt_is_mod(fmt) && !try_module_get(fmt->owner))
24999 + fmt = NULL;
25000 +
25001 + spin_unlock(&pfm_smpl_fmt_lock);
25002 +
25003 + return fmt;
25004 +}
25005 +
25006 +void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt)
25007 +{
25008 + if (fmt == NULL || !fmt_is_mod(fmt))
25009 + return;
25010 + BUG_ON(fmt->owner == NULL);
25011 +
25012 + spin_lock(&pfm_smpl_fmt_lock);
25013 + module_put(fmt->owner);
25014 + spin_unlock(&pfm_smpl_fmt_lock);
25015 +}
25016 +
25017 +int pfm_fmt_register(struct pfm_smpl_fmt *fmt)
25018 +{
25019 + int ret = 0;
25020 +
25021 + if (perfmon_disabled) {
25022 + PFM_INFO("perfmon disabled, cannot add sampling format");
25023 + return -ENOSYS;
25024 + }
25025 +
25026 + /* some sanity checks */
25027 + if (fmt == NULL) {
25028 + PFM_INFO("perfmon: NULL format for register");
25029 + return -EINVAL;
25030 + }
25031 +
25032 + if (fmt->fmt_name == NULL) {
25033 + PFM_INFO("perfmon: format has no name");
25034 + return -EINVAL;
25035 + }
25036 +
25037 + if (fmt->fmt_qdepth > PFM_MSGS_COUNT) {
25038 + PFM_INFO("perfmon: format %s requires %u msg queue depth (max %d)",
25039 + fmt->fmt_name,
25040 + fmt->fmt_qdepth,
25041 + PFM_MSGS_COUNT);
25042 + return -EINVAL;
25043 + }
25044 +
25045 + /*
25046 + * fmt is missing the initialization of .owner = THIS_MODULE
25047 + * this is only valid when format is compiled as a module
25048 + */
25049 + if (fmt->owner == NULL && fmt_is_mod(fmt)) {
25050 + PFM_INFO("format %s has no module owner", fmt->fmt_name);
25051 + return -EINVAL;
25052 + }
25053 + /*
25054 + * we need at least a handler
25055 + */
25056 + if (fmt->fmt_handler == NULL) {
25057 + PFM_INFO("format %s has no handler", fmt->fmt_name);
25058 + return -EINVAL;
25059 + }
25060 +
25061 + /*
25062 + * format argument size cannot be bigger than PAGE_SIZE
25063 + */
25064 + if (fmt->fmt_arg_size > PAGE_SIZE) {
25065 + PFM_INFO("format %s arguments too big", fmt->fmt_name);
25066 + return -EINVAL;
25067 + }
25068 +
25069 + spin_lock(&pfm_smpl_fmt_lock);
25070 +
25071 + /*
25072 + * because of sysfs, we cannot have two formats with the same name
25073 + */
25074 + if (pfm_find_fmt(fmt->fmt_name)) {
25075 + PFM_INFO("format %s already registered", fmt->fmt_name);
25076 + ret = -EBUSY;
25077 + goto out;
25078 + }
25079 +
25080 + ret = pfm_sysfs_add_fmt(fmt);
25081 + if (ret) {
25082 + PFM_INFO("sysfs cannot add format entry for %s", fmt->fmt_name);
25083 + goto out;
25084 + }
25085 +
25086 + list_add(&fmt->fmt_list, &pfm_smpl_fmt_list);
25087 +
25088 + PFM_INFO("added sampling format %s", fmt->fmt_name);
25089 +out:
25090 + spin_unlock(&pfm_smpl_fmt_lock);
25091 +
25092 + return ret;
25093 +}
25094 +EXPORT_SYMBOL(pfm_fmt_register);
25095 +
25096 +int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt)
25097 +{
25098 + struct pfm_smpl_fmt *fmt2;
25099 + int ret = 0;
25100 +
25101 + if (!fmt || !fmt->fmt_name) {
25102 + PFM_DBG("invalid fmt");
25103 + return -EINVAL;
25104 + }
25105 +
25106 + spin_lock(&pfm_smpl_fmt_lock);
25107 +
25108 + fmt2 = pfm_find_fmt(fmt->fmt_name);
25109 + if (!fmt) {
25110 + PFM_INFO("unregister failed, format not registered");
25111 + ret = -EINVAL;
25112 + goto out;
25113 + }
25114 + list_del_init(&fmt->fmt_list);
25115 +
25116 + pfm_sysfs_remove_fmt(fmt);
25117 +
25118 + PFM_INFO("removed sampling format: %s", fmt->fmt_name);
25119 +
25120 +out:
25121 + spin_unlock(&pfm_smpl_fmt_lock);
25122 + return ret;
25123 +
25124 +}
25125 +EXPORT_SYMBOL(pfm_fmt_unregister);
25126 +
25127 +/*
25128 + * we defer adding the builtin formats to /sys/kernel/perfmon/formats
25129 + * until after the pfm sysfs subsystem is initialized. This function
25130 + * is called from pfm_init_sysfs()
25131 + */
25132 +void __init pfm_sysfs_builtin_fmt_add(void)
25133 +{
25134 + struct pfm_smpl_fmt *entry;
25135 +
25136 + /*
25137 + * locking not needed, kernel not fully booted
25138 + * when called
25139 + */
25140 + list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) {
25141 + pfm_sysfs_add_fmt(entry);
25142 + }
25143 +}
25144 diff --git a/perfmon/perfmon_hotplug.c b/perfmon/perfmon_hotplug.c
25145 new file mode 100644
25146 index 0000000..eaaba81
25147 --- /dev/null
25148 +++ b/perfmon/perfmon_hotplug.c
25149 @@ -0,0 +1,151 @@
25150 +/*
25151 + * perfmon_hotplug.c: handling of CPU hotplug
25152 + *
25153 + * The initial version of perfmon.c was written by
25154 + * Ganesh Venkitachalam, IBM Corp.
25155 + *
25156 + * Then it was modified for perfmon-1.x by Stephane Eranian and
25157 + * David Mosberger, Hewlett Packard Co.
25158 + *
25159 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
25160 + * by Stephane Eranian, Hewlett Packard Co.
25161 + *
25162 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
25163 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
25164 + * David Mosberger-Tang <davidm@hpl.hp.com>
25165 + *
25166 + * More information about perfmon available at:
25167 + * http://perfmon2.sf.net
25168 + *
25169 + * This program is free software; you can redistribute it and/or
25170 + * modify it under the terms of version 2 of the GNU General Public
25171 + * License as published by the Free Software Foundation.
25172 + *
25173 + * This program is distributed in the hope that it will be useful,
25174 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25175 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25176 + * General Public License for more details.
25177 + *
25178 + * You should have received a copy of the GNU General Public License
25179 + * along with this program; if not, write to the Free Software
25180 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25181 + * 02111-1307 USA
25182 + */
25183 +#include <linux/kernel.h>
25184 +#include <linux/perfmon_kern.h>
25185 +#include <linux/cpu.h>
25186 +#include "perfmon_priv.h"
25187 +
25188 +#ifndef CONFIG_HOTPLUG_CPU
25189 +void pfm_cpu_disable(void)
25190 +{}
25191 +
25192 +int __init pfm_init_hotplug(void)
25193 +{
25194 + return 0;
25195 +}
25196 +#else /* CONFIG_HOTPLUG_CPU */
25197 +/*
25198 + * CPU hotplug event nofication callback
25199 + *
25200 + * We use the callback to do manage the sysfs interface.
25201 + * Note that the actual shutdown of monitoring on the CPU
25202 + * is done in pfm_cpu_disable(), see comments there for more
25203 + * information.
25204 + */
25205 +static int pfm_cpu_notify(struct notifier_block *nfb,
25206 + unsigned long action, void *hcpu)
25207 +{
25208 + unsigned int cpu = (unsigned long)hcpu;
25209 + int ret = NOTIFY_OK;
25210 +
25211 + pfm_pmu_conf_get(0);
25212 +
25213 + switch (action) {
25214 + case CPU_ONLINE:
25215 + pfm_debugfs_add_cpu(cpu);
25216 + PFM_INFO("CPU%d is online", cpu);
25217 + break;
25218 + case CPU_UP_PREPARE:
25219 + PFM_INFO("CPU%d prepare online", cpu);
25220 + break;
25221 + case CPU_UP_CANCELED:
25222 + pfm_debugfs_del_cpu(cpu);
25223 + PFM_INFO("CPU%d is up canceled", cpu);
25224 + break;
25225 + case CPU_DOWN_PREPARE:
25226 + PFM_INFO("CPU%d prepare offline", cpu);
25227 + break;
25228 + case CPU_DOWN_FAILED:
25229 + PFM_INFO("CPU%d is down failed", cpu);
25230 + break;
25231 + case CPU_DEAD:
25232 + pfm_debugfs_del_cpu(cpu);
25233 + PFM_INFO("CPU%d is offline", cpu);
25234 + break;
25235 + }
25236 + pfm_pmu_conf_put();
25237 + return ret;
25238 +}
25239 +
25240 +/*
25241 + * called from cpu_disable() to detach the perfmon context
25242 + * from the CPU going down.
25243 + *
25244 + * We cannot use the cpu hotplug notifier because we MUST run
25245 + * on the CPU that is going down to save the PMU state
25246 + */
25247 +void pfm_cpu_disable(void)
25248 +{
25249 + struct pfm_context *ctx;
25250 + unsigned long flags;
25251 + int is_system, release_info = 0;
25252 + u32 cpu;
25253 + int r;
25254 +
25255 + ctx = __get_cpu_var(pmu_ctx);
25256 + if (ctx == NULL)
25257 + return;
25258 +
25259 + is_system = ctx->flags.system;
25260 + cpu = ctx->cpu;
25261 +
25262 + /*
25263 + * context is LOADED or MASKED
25264 + *
25265 + * we unload from CPU. That stops monitoring and does
25266 + * all the bookeeping of saving values and updating duration
25267 + */
25268 + spin_lock_irqsave(&ctx->lock, flags);
25269 + if (is_system)
25270 + __pfm_unload_context(ctx, &release_info);
25271 + spin_unlock_irqrestore(&ctx->lock, flags);
25272 +
25273 + /*
25274 + * cancel timer
25275 + */
25276 + if (release_info & 0x2) {
25277 + r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
25278 + PFM_DBG("timeout cancel=%d", r);
25279 + }
25280 +
25281 + if (release_info & 0x1)
25282 + pfm_session_release(is_system, cpu);
25283 +}
25284 +
25285 +static struct notifier_block pfm_cpu_notifier = {
25286 + .notifier_call = pfm_cpu_notify
25287 +};
25288 +
25289 +int __init pfm_init_hotplug(void)
25290 +{
25291 + int ret = 0;
25292 + /*
25293 + * register CPU hotplug event notifier
25294 + */
25295 + ret = register_cpu_notifier(&pfm_cpu_notifier);
25296 + if (!ret)
25297 + PFM_LOG("CPU hotplug support enabled");
25298 + return ret;
25299 +}
25300 +#endif /* CONFIG_HOTPLUG_CPU */
25301 diff --git a/perfmon/perfmon_init.c b/perfmon/perfmon_init.c
25302 new file mode 100644
25303 index 0000000..bbb6e4d
25304 --- /dev/null
25305 +++ b/perfmon/perfmon_init.c
25306 @@ -0,0 +1,131 @@
25307 +/*
25308 + * perfmon.c: perfmon2 global initialization functions
25309 + *
25310 + * This file implements the perfmon2 interface which
25311 + * provides access to the hardware performance counters
25312 + * of the host processor.
25313 + *
25314 + *
25315 + * The initial version of perfmon.c was written by
25316 + * Ganesh Venkitachalam, IBM Corp.
25317 + *
25318 + * Then it was modified for perfmon-1.x by Stephane Eranian and
25319 + * David Mosberger, Hewlett Packard Co.
25320 + *
25321 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
25322 + * by Stephane Eranian, Hewlett Packard Co.
25323 + *
25324 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
25325 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
25326 + * David Mosberger-Tang <davidm@hpl.hp.com>
25327 + *
25328 + * More information about perfmon available at:
25329 + * http://perfmon2.sf.net
25330 + *
25331 + * This program is free software; you can redistribute it and/or
25332 + * modify it under the terms of version 2 of the GNU General Public
25333 + * License as published by the Free Software Foundation.
25334 + *
25335 + * This program is distributed in the hope that it will be useful,
25336 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25337 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25338 + * General Public License for more details.
25339 + *
25340 + * You should have received a copy of the GNU General Public License
25341 + * along with this program; if not, write to the Free Software
25342 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25343 + * 02111-1307 USA
25344 + */
25345 +#include <linux/kernel.h>
25346 +#include <linux/perfmon_kern.h>
25347 +#include "perfmon_priv.h"
25348 +
25349 +/*
25350 + * external variables
25351 + */
25352 +DEFINE_PER_CPU(struct task_struct *, pmu_owner);
25353 +DEFINE_PER_CPU(struct pfm_context *, pmu_ctx);
25354 +DEFINE_PER_CPU(u64, pmu_activation_number);
25355 +DEFINE_PER_CPU(struct pfm_stats, pfm_stats);
25356 +DEFINE_PER_CPU(struct hrtimer, pfm_hrtimer);
25357 +
25358 +
25359 +int perfmon_disabled; /* >0 if perfmon is disabled */
25360 +
25361 +/*
25362 + * called from cpu_init() and pfm_pmu_register()
25363 + */
25364 +void __pfm_init_percpu(void *dummy)
25365 +{
25366 + struct hrtimer *h;
25367 +
25368 + h = &__get_cpu_var(pfm_hrtimer);
25369 +
25370 + pfm_arch_init_percpu();
25371 +
25372 + /*
25373 + * initialize per-cpu high res timer
25374 + */
25375 + hrtimer_init(h, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
25376 +#ifdef CONFIG_HIGH_RES_TIMERS
25377 + /*
25378 + * avoid potential deadlock on the runqueue lock
25379 + * during context switch when multiplexing. Situation
25380 + * arises on architectures which run switch_to() with
25381 + * the runqueue lock held, e.g., x86. On others, e.g.,
25382 + * IA-64, the problem does not exist.
25383 + * Setting the callback mode to HRTIMER_CB_IRQSAFE_UNOCKED
25384 + * such that the callback routine is only called on hardirq
25385 + * context not on softirq, thus the context switch will not
25386 + * end up trying to wakeup the softirqd
25387 + */
25388 + h->cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
25389 +#endif
25390 + h->function = pfm_handle_switch_timeout;
25391 +}
25392 +
25393 +/*
25394 + * global initialization routine, executed only once
25395 + */
25396 +int __init pfm_init(void)
25397 +{
25398 + PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN);
25399 +
25400 + if (pfm_init_ctx())
25401 + goto error_disable;
25402 +
25403 +
25404 + if (pfm_init_sets())
25405 + goto error_disable;
25406 +
25407 + if (pfm_init_fs())
25408 + goto error_disable;
25409 +
25410 + if (pfm_init_sysfs())
25411 + goto error_disable;
25412 +
25413 + /* not critical, so no error checking */
25414 + pfm_init_debugfs();
25415 +
25416 + /*
25417 + * one time, arch-specific global initialization
25418 + */
25419 + if (pfm_arch_init())
25420 + goto error_disable;
25421 +
25422 + if (pfm_init_hotplug())
25423 + goto error_disable;
25424 + return 0;
25425 +
25426 +error_disable:
25427 + PFM_ERR("perfmon is disabled due to initialization error");
25428 + perfmon_disabled = 1;
25429 + return -1;
25430 +}
25431 +
25432 +/*
25433 + * must use subsys_initcall() to ensure that the perfmon2 core
25434 + * is initialized before any PMU description module when they are
25435 + * compiled in.
25436 + */
25437 +subsys_initcall(pfm_init);
25438 diff --git a/perfmon/perfmon_intr.c b/perfmon/perfmon_intr.c
25439 new file mode 100644
25440 index 0000000..c5e3cda
25441 --- /dev/null
25442 +++ b/perfmon/perfmon_intr.c
25443 @@ -0,0 +1,648 @@
25444 +/*
25445 + * perfmon_intr.c: perfmon2 interrupt handling
25446 + *
25447 + * This file implements the perfmon2 interface which
25448 + * provides access to the hardware performance counters
25449 + * of the host processor.
25450 + *
25451 + * The initial version of perfmon.c was written by
25452 + * Ganesh Venkitachalam, IBM Corp.
25453 + *
25454 + * Then it was modified for perfmon-1.x by Stephane Eranian and
25455 + * David Mosberger, Hewlett Packard Co.
25456 + *
25457 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
25458 + * by Stephane Eranian, Hewlett Packard Co.
25459 + *
25460 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
25461 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
25462 + * David Mosberger-Tang <davidm@hpl.hp.com>
25463 + *
25464 + * More information about perfmon available at:
25465 + * http://perfmon2.sf.net
25466 + *
25467 + * This program is free software; you can redistribute it and/or
25468 + * modify it under the terms of version 2 of the GNU General Public
25469 + * License as published by the Free Software Foundation.
25470 + *
25471 + * This program is distributed in the hope that it will be useful,
25472 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
25473 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25474 + * General Public License for more details.
25475 + *
25476 + * You should have received a copy of the GNU General Public License
25477 + * along with this program; if not, write to the Free Software
25478 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25479 + * 02111-1307 USA
25480 + */
25481 +#include <linux/kernel.h>
25482 +#include <linux/module.h>
25483 +#include <linux/perfmon_kern.h>
25484 +#include "perfmon_priv.h"
25485 +
25486 +/**
25487 + * pfm_intr_process_64bit_ovfls - handle 64-bit counter emulation
25488 + * @ctx: context to operate on
25489 + * @set: set to operate on
25490 + *
25491 + * The function returns the number of 64-bit overflows detected.
25492 + *
25493 + * 64-bit software pmds are updated for overflowed pmd registers
25494 + * the set->reset_pmds is updated to the list of pmds to reset
25495 + *
25496 + * In any case, set->npend_ovfls is cleared
25497 + */
25498 +static u16 pfm_intr_process_64bit_ovfls(struct pfm_context *ctx,
25499 + struct pfm_event_set *set,
25500 + u32 *ovfl_ctrl)
25501 +{
25502 + u16 i, num_ovfls, max_pmd, max_intr;
25503 + u16 num_64b_ovfls, has_ovfl_sw, must_switch;
25504 + u64 ovfl_thres, old_val, new_val, ovfl_mask;
25505 +
25506 + num_64b_ovfls = must_switch = 0;
25507 +
25508 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
25509 + max_pmd = ctx->regs.max_pmd;
25510 + max_intr = ctx->regs.max_intr_pmd;
25511 +
25512 + num_ovfls = set->npend_ovfls;
25513 + has_ovfl_sw = set->flags & PFM_SETFL_OVFL_SWITCH;
25514 +
25515 + bitmap_zero(cast_ulp(set->reset_pmds), max_pmd);
25516 +
25517 + for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) {
25518 + /*
25519 + * skip pmd which did not overflow
25520 + */
25521 + if (!test_bit(i, cast_ulp(set->povfl_pmds)))
25522 + continue;
25523 +
25524 + num_ovfls--;
25525 +
25526 + /*
25527 + * Update software value for counters ONLY
25528 + *
25529 + * Note that the pmd is not necessarily 0 at this point as
25530 + * qualified events may have happened before the PMU was
25531 + * frozen. The residual count is not taken into consideration
25532 + * here but will be with any read of the pmd
25533 + */
25534 + ovfl_thres = set->pmds[i].ovflsw_thres;
25535 +
25536 + if (likely(test_bit(i, cast_ulp(ctx->regs.cnt_pmds)))) {
25537 + old_val = new_val = set->pmds[i].value;
25538 + new_val += 1 + ovfl_mask;
25539 + set->pmds[i].value = new_val;
25540 + } else {
25541 + /*
25542 + * for non counters which interrupt, e.g., AMD IBS,
25543 + * we consider this equivalent to a 64-bit counter
25544 + * overflow.
25545 + */
25546 + old_val = 1; new_val = 0;
25547 + }
25548 +
25549 + /*
25550 + * check for 64-bit overflow condition
25551 + */
25552 + if (likely(old_val > new_val)) {
25553 + num_64b_ovfls++;
25554 + if (has_ovfl_sw && ovfl_thres > 0) {
25555 + if (ovfl_thres == 1)
25556 + must_switch = 1;
25557 + set->pmds[i].ovflsw_thres = ovfl_thres - 1;
25558 + }
25559 +
25560 + /*
25561 + * what to reset because of this overflow
25562 + * - the overflowed register
25563 + * - its reset_smpls
25564 + */
25565 + __set_bit(i, cast_ulp(set->reset_pmds));
25566 +
25567 + bitmap_or(cast_ulp(set->reset_pmds),
25568 + cast_ulp(set->reset_pmds),
25569 + cast_ulp(set->pmds[i].reset_pmds),
25570 + max_pmd);
25571 + } else {
25572 + /*
25573 + * only keep track of 64-bit overflows or
25574 + * assimilated
25575 + */
25576 + __clear_bit(i, cast_ulp(set->povfl_pmds));
25577 +
25578 + /*
25579 + * on some PMU, it may be necessary to re-arm the PMD
25580 + */
25581 + pfm_arch_ovfl_reset_pmd(ctx, i);
25582 + }
25583 +
25584 + PFM_DBG_ovfl("ovfl=%s pmd%u new=0x%llx old=0x%llx "
25585 + "hw_pmd=0x%llx o_pmds=0x%llx must_switch=%u "
25586 + "o_thres=%llu o_thres_ref=%llu",
25587 + old_val > new_val ? "64-bit" : "HW",
25588 + i,
25589 + (unsigned long long)new_val,
25590 + (unsigned long long)old_val,
25591 + (unsigned long long)pfm_read_pmd(ctx, i),
25592 + (unsigned long long)set->povfl_pmds[0],
25593 + must_switch,
25594 + (unsigned long long)set->pmds[i].ovflsw_thres,
25595 + (unsigned long long)set->pmds[i].ovflsw_ref_thres);
25596 + }
25597 + /*
25598 + * update public bitmask of 64-bit overflowed pmds
25599 + */
25600 + if (num_64b_ovfls)
25601 + bitmap_copy(cast_ulp(set->ovfl_pmds), cast_ulp(set->povfl_pmds),
25602 + max_intr);
25603 +
25604 + if (must_switch)
25605 + *ovfl_ctrl |= PFM_OVFL_CTRL_SWITCH;
25606 +
25607 + /*
25608 + * mark the overflows as consumed
25609 + */
25610 + set->npend_ovfls = 0;
25611 + bitmap_zero(cast_ulp(set->povfl_pmds), max_intr);
25612 +
25613 + return num_64b_ovfls;
25614 +}
25615 +
25616 +/**
25617 + * pfm_intr_get_smpl_pmds_values - copy 64-bit pmd values for sampling format
25618 + * @ctx: context to work on
25619 + * @set: current event set
25620 + * @arg: overflow arg to be passed to format
25621 + * @smpl_pmds: list of PMDs of interest for the overflowed register
25622 + *
25623 + * build an array of 46-bit PMD values based on smpl_pmds. Values are
25624 + * stored in increasing order of the PMD indexes
25625 + */
25626 +static void pfm_intr_get_smpl_pmds_values(struct pfm_context *ctx,
25627 + struct pfm_event_set *set,
25628 + struct pfm_ovfl_arg *arg,
25629 + u64 *smpl_pmds)
25630 +{
25631 + u16 j, k, max_pmd;
25632 + u64 new_val, ovfl_mask;
25633 + u64 *cnt_pmds;
25634 +
25635 + cnt_pmds = ctx->regs.cnt_pmds;
25636 + max_pmd = ctx->regs.max_pmd;
25637 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
25638 +
25639 + for (j = k = 0; j < max_pmd; j++) {
25640 +
25641 + if (!test_bit(j, cast_ulp(smpl_pmds)))
25642 + continue;
25643 +
25644 + new_val = pfm_read_pmd(ctx, j);
25645 +
25646 + /* for counters, build 64-bit value */
25647 + if (test_bit(j, cast_ulp(cnt_pmds)))
25648 + new_val = (set->pmds[j].value & ~ovfl_mask)
25649 + | (new_val & ovfl_mask);
25650 +
25651 + arg->smpl_pmds_values[k++] = new_val;
25652 +
25653 + PFM_DBG_ovfl("s_pmd_val[%u]=pmd%u=0x%llx", k, j,
25654 + (unsigned long long)new_val);
25655 + }
25656 + arg->num_smpl_pmds = k;
25657 +}
25658 +
25659 +/**
25660 + * pfm_intr_process_smpl_fmt -- handle sampling format callback
25661 + * @ctx: context to work on
25662 + * @set: current event set
25663 + * @ip: interrupted instruction pointer
25664 + * @now: timestamp
25665 + * @num_ovfls: number of 64-bit overflows
25666 + * @ovfl_ctrl: set of controls for interrupt handler tail processing
25667 + * @regs: register state
25668 + *
25669 + * Prepare argument (ovfl_arg) to be passed to sampling format callback, then
25670 + * invoke the callback (fmt_handler)
25671 + */
25672 +static int pfm_intr_process_smpl_fmt(struct pfm_context *ctx,
25673 + struct pfm_event_set *set,
25674 + unsigned long ip,
25675 + u64 now,
25676 + u64 num_ovfls,
25677 + u32 *ovfl_ctrl,
25678 + struct pt_regs *regs)
25679 +{
25680 + struct pfm_ovfl_arg *ovfl_arg;
25681 + u64 start_cycles, end_cycles;
25682 + u16 i, max_pmd;
25683 + int ret = 0;
25684 +
25685 + ovfl_arg = &ctx->ovfl_arg;
25686 +
25687 + ovfl_arg->active_set = set->id;
25688 + max_pmd = ctx->regs.max_pmd;
25689 +
25690 + /*
25691 + * first_intr_pmd: first PMD which can generate PMU interrupts
25692 + */
25693 + for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) {
25694 + /*
25695 + * skip pmd which did not have 64-bit overflows
25696 + */
25697 + if (!test_bit(i, cast_ulp(set->ovfl_pmds)))
25698 + continue;
25699 +
25700 + num_ovfls--;
25701 +
25702 + /*
25703 + * prepare argument to fmt_handler
25704 + */
25705 + ovfl_arg->ovfl_pmd = i;
25706 + ovfl_arg->ovfl_ctrl = 0;
25707 +
25708 + ovfl_arg->pmd_last_reset = set->pmds[i].lval;
25709 + ovfl_arg->pmd_eventid = set->pmds[i].eventid;
25710 + ovfl_arg->num_smpl_pmds = 0;
25711 +
25712 + /*
25713 + * copy values of pmds of interest, if any
25714 + * Sampling format may use them
25715 + * We do not initialize the unused smpl_pmds_values
25716 + */
25717 + if (!bitmap_empty(cast_ulp(set->pmds[i].smpl_pmds), max_pmd))
25718 + pfm_intr_get_smpl_pmds_values(ctx, set, ovfl_arg,
25719 + set->pmds[i].smpl_pmds);
25720 +
25721 + pfm_stats_inc(fmt_handler_calls);
25722 +
25723 + /*
25724 + * call format record (handler) routine
25725 + */
25726 + start_cycles = sched_clock();
25727 + ret = (*ctx->smpl_fmt->fmt_handler)(ctx, ip, now, regs);
25728 + end_cycles = sched_clock();
25729 +
25730 + /*
25731 + * The reset_pmds mask is constructed automatically
25732 + * on overflow. When the actual reset takes place
25733 + * depends on the masking, switch and notification
25734 + * status. It may be deferred until pfm_restart().
25735 + */
25736 + *ovfl_ctrl |= ovfl_arg->ovfl_ctrl;
25737 +
25738 + pfm_stats_add(fmt_handler_ns, end_cycles - start_cycles);
25739 + }
25740 + /*
25741 + * when the format cannot handle the rest of the overflow, we abort
25742 + */
25743 + if (ret)
25744 + PFM_DBG_ovfl("handler aborted at PMD%u ret=%d", i, ret);
25745 + return ret;
25746 +}
25747 +/**
25748 + * pfm_overflow_handler - main overflow processing routine.
25749 + * @ctx: context to work on (always current context)
25750 + * @set: current event set
25751 + * @ip: interrupt instruction pointer
25752 + * @regs: machine state
25753 + *
25754 + * set->num_ovfl_pmds is 0 when returning from this function even though
25755 + * set->ovfl_pmds[] may have bits set. When leaving set->num_ovfl_pmds
25756 + * must never be used to determine if there was a pending overflow.
25757 + */
25758 +static void pfm_overflow_handler(struct pfm_context *ctx,
25759 + struct pfm_event_set *set,
25760 + unsigned long ip,
25761 + struct pt_regs *regs)
25762 +{
25763 + struct pfm_event_set *set_orig;
25764 + u64 now;
25765 + u32 ovfl_ctrl;
25766 + u16 max_intr, max_pmd;
25767 + u16 num_ovfls;
25768 + int ret, has_notify;
25769 +
25770 + /*
25771 + * take timestamp
25772 + */
25773 + now = sched_clock();
25774 +
25775 + max_pmd = ctx->regs.max_pmd;
25776 + max_intr = ctx->regs.max_intr_pmd;
25777 +
25778 + set_orig = set;
25779 + ovfl_ctrl = 0;
25780 +
25781 + /*
25782 + * skip ZOMBIE case
25783 + */
25784 + if (unlikely(ctx->state == PFM_CTX_ZOMBIE))
25785 + goto stop_monitoring;
25786 +
25787 + PFM_DBG_ovfl("intr_pmds=0x%llx npend=%u ip=%p, blocking=%d "
25788 + "u_pmds=0x%llx use_fmt=%u",
25789 + (unsigned long long)set->povfl_pmds[0],
25790 + set->npend_ovfls,
25791 + (void *)ip,
25792 + ctx->flags.block,
25793 + (unsigned long long)set->used_pmds[0],
25794 + !!ctx->smpl_fmt);
25795 +
25796 + /*
25797 + * return number of 64-bit overflows
25798 + */
25799 + num_ovfls = pfm_intr_process_64bit_ovfls(ctx, set, &ovfl_ctrl);
25800 +
25801 + /*
25802 + * there were no 64-bit overflows
25803 + * nothing else to do
25804 + */
25805 + if (!num_ovfls)
25806 + return;
25807 +
25808 + /*
25809 + * tmp_ovfl_notify = ovfl_pmds & ovfl_notify
25810 + * with:
25811 + * - ovfl_pmds: last 64-bit overflowed pmds
25812 + * - ovfl_notify: notify on overflow registers
25813 + */
25814 + bitmap_and(cast_ulp(ctx->tmp_ovfl_notify),
25815 + cast_ulp(set->ovfl_pmds),
25816 + cast_ulp(set->ovfl_notify),
25817 + max_intr);
25818 +
25819 + has_notify = !bitmap_empty(cast_ulp(ctx->tmp_ovfl_notify), max_intr);
25820 +
25821 + /*
25822 + * check for sampling format and invoke fmt_handler
25823 + */
25824 + if (likely(ctx->smpl_fmt)) {
25825 + pfm_intr_process_smpl_fmt(ctx, set, ip, now, num_ovfls,
25826 + &ovfl_ctrl, regs);
25827 + } else {
25828 + /*
25829 + * When no sampling format is used, the default
25830 + * is:
25831 + * - mask monitoring if not switching
25832 + * - notify user if requested
25833 + *
25834 + * If notification is not requested, monitoring is masked
25835 + * and overflowed registers are not reset (saturation).
25836 + * This mimics the behavior of the default sampling format.
25837 + */
25838 + ovfl_ctrl |= PFM_OVFL_CTRL_NOTIFY;
25839 + if (has_notify || !(ovfl_ctrl & PFM_OVFL_CTRL_SWITCH))
25840 + ovfl_ctrl |= PFM_OVFL_CTRL_MASK;
25841 + }
25842 +
25843 + PFM_DBG_ovfl("set%u o_notify=0x%llx o_pmds=0x%llx "
25844 + "r_pmds=0x%llx ovfl_ctrl=0x%x",
25845 + set->id,
25846 + (unsigned long long)ctx->tmp_ovfl_notify[0],
25847 + (unsigned long long)set->ovfl_pmds[0],
25848 + (unsigned long long)set->reset_pmds[0],
25849 + ovfl_ctrl);
25850 +
25851 + /*
25852 + * execute the various controls
25853 + * ORDER MATTERS
25854 + */
25855 +
25856 +
25857 + /*
25858 + * mask monitoring
25859 + */
25860 + if (ovfl_ctrl & PFM_OVFL_CTRL_MASK) {
25861 + pfm_mask_monitoring(ctx, set);
25862 + /*
25863 + * when masking, reset is deferred until
25864 + * pfm_restart()
25865 + */
25866 + ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET;
25867 +
25868 + /*
25869 + * when masking, switching is deferred until
25870 + * pfm_restart and we need to remember it
25871 + */
25872 + if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) {
25873 + set->priv_flags |= PFM_SETFL_PRIV_SWITCH;
25874 + ovfl_ctrl &= ~PFM_OVFL_CTRL_SWITCH;
25875 + }
25876 + }
25877 +
25878 + /*
25879 + * switch event set
25880 + */
25881 + if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) {
25882 + pfm_switch_sets_from_intr(ctx);
25883 + /* update view of active set */
25884 + set = ctx->active_set;
25885 + }
25886 + /*
25887 + * send overflow notification
25888 + *
25889 + * only necessary if at least one overflowed
25890 + * register had the notify flag set
25891 + */
25892 + if (has_notify && (ovfl_ctrl & PFM_OVFL_CTRL_NOTIFY)) {
25893 + /*
25894 + * block on notify, not on masking
25895 + */
25896 + if (ctx->flags.block)
25897 + pfm_post_work(current, ctx, PFM_WORK_BLOCK);
25898 +
25899 + /*
25900 + * send notification and passed original set id
25901 + * if error, queue full, for instance, then default
25902 + * to masking monitoring, i.e., saturate
25903 + */
25904 + ret = pfm_ovfl_notify(ctx, set_orig, ip);
25905 + if (unlikely(ret)) {
25906 + if (ctx->state == PFM_CTX_LOADED) {
25907 + pfm_mask_monitoring(ctx, set);
25908 + ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET;
25909 + }
25910 + } else {
25911 + ctx->flags.can_restart++;
25912 + PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart);
25913 + }
25914 + }
25915 +
25916 + /*
25917 + * reset overflowed registers
25918 + */
25919 + if (ovfl_ctrl & PFM_OVFL_CTRL_RESET) {
25920 + u16 nn;
25921 + nn = bitmap_weight(cast_ulp(set->reset_pmds), max_pmd);
25922 + if (nn)
25923 + pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_SHORT);
25924 + }
25925 + return;
25926 +
25927 +stop_monitoring:
25928 + /*
25929 + * Does not happen for a system-wide context nor for a
25930 + * self-monitored context. We cannot attach to kernel-only
25931 + * thread, thus it is safe to set TIF bits, i.e., the thread
25932 + * will eventually leave the kernel or die and either we will
25933 + * catch the context and clean it up in pfm_handler_work() or
25934 + * pfm_exit_thread().
25935 + *
25936 + * Mask until we get to pfm_handle_work()
25937 + */
25938 + pfm_mask_monitoring(ctx, set);
25939 +
25940 + PFM_DBG_ovfl("ctx is zombie, converted to spurious");
25941 + pfm_post_work(current, ctx, PFM_WORK_ZOMBIE);
25942 +}
25943 +
25944 +/**
25945 + * __pfm_interrupt_handler - 1st level interrupt handler
25946 + * @ip: interrupted instruction pointer
25947 + * @regs: machine state
25948 + *
25949 + * Function is static because we use a wrapper to easily capture timing infos.
25950 + *
25951 + *
25952 + * Context locking necessary to avoid concurrent accesses from other CPUs
25953 + * - For per-thread, we must prevent pfm_restart() which works when
25954 + * context is LOADED or MASKED
25955 + */
25956 +static void __pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs)
25957 +{
25958 + struct task_struct *task;
25959 + struct pfm_context *ctx;
25960 + struct pfm_event_set *set;
25961 +
25962 +
25963 + task = __get_cpu_var(pmu_owner);
25964 + ctx = __get_cpu_var(pmu_ctx);
25965 +
25966 + /*
25967 + * verify if there is a context on this CPU
25968 + */
25969 + if (unlikely(ctx == NULL)) {
25970 + PFM_DBG_ovfl("no ctx");
25971 + goto spurious;
25972 + }
25973 +
25974 + /*
25975 + * we need to lock context because it could be accessed
25976 + * from another CPU. Depending on the priority level of
25977 + * the PMU interrupt or the arch, it may be necessary to
25978 + * mask interrupts alltogether to avoid race condition with
25979 + * the timer interrupt in case of time-based set switching,
25980 + * for instance.
25981 + */
25982 + spin_lock(&ctx->lock);
25983 +
25984 + set = ctx->active_set;
25985 +
25986 + /*
25987 + * For SMP per-thread, it is not possible to have
25988 + * owner != NULL && task != current.
25989 + *
25990 + * For UP per-thread, because of lazy save, it
25991 + * is possible to receive an interrupt in another task
25992 + * which is not using the PMU. This means
25993 + * that the interrupt was in-flight at the
25994 + * time of pfm_ctxswout_thread(). In that
25995 + * case, it will be replayed when the task
25996 + * is scheduled again. Hence we convert to spurious.
25997 + *
25998 + * The basic rule is that an overflow is always
25999 + * processed in the context of the task that
26000 + * generated it for all per-thread contexts.
26001 + *
26002 + * for system-wide, task is always NULL
26003 + */
26004 +#ifndef CONFIG_SMP
26005 + if (unlikely((task && current->pfm_context != ctx))) {
26006 + PFM_DBG_ovfl("spurious: not owned by current task");
26007 + goto spurious;
26008 + }
26009 +#endif
26010 + if (unlikely(ctx->state == PFM_CTX_MASKED)) {
26011 + PFM_DBG_ovfl("spurious: monitoring masked");
26012 + goto spurious;
26013 + }
26014 +
26015 + /*
26016 + * check that monitoring is active, otherwise convert
26017 + * to spurious
26018 + */
26019 + if (unlikely(!pfm_arch_is_active(ctx))) {
26020 + PFM_DBG_ovfl("spurious: monitoring non active");
26021 + goto spurious;
26022 + }
26023 +
26024 + /*
26025 + * freeze PMU and collect overflowed PMD registers
26026 + * into set->povfl_pmds. Number of overflowed PMDs
26027 + * reported in set->npend_ovfls
26028 + */
26029 + pfm_arch_intr_freeze_pmu(ctx, set);
26030 +
26031 + /*
26032 + * no overflow detected, interrupt may have come
26033 + * from the previous thread running on this CPU
26034 + */
26035 + if (unlikely(!set->npend_ovfls)) {
26036 + PFM_DBG_ovfl("no npend_ovfls");
26037 + goto spurious;
26038 + }
26039 +
26040 + pfm_stats_inc(ovfl_intr_regular_count);
26041 +
26042 + /*
26043 + * invoke actual handler
26044 + */
26045 + pfm_overflow_handler(ctx, set, ip, regs);
26046 +
26047 + /*
26048 + * unfreeze PMU, monitoring may not actual be restarted
26049 + * if context is MASKED
26050 + */
26051 + pfm_arch_intr_unfreeze_pmu(ctx);
26052 +
26053 + spin_unlock(&ctx->lock);
26054 +
26055 + return;
26056 +
26057 +spurious:
26058 + /* ctx may be NULL */
26059 + pfm_arch_intr_unfreeze_pmu(ctx);
26060 + if (ctx)
26061 + spin_unlock(&ctx->lock);
26062 +
26063 + pfm_stats_inc(ovfl_intr_spurious_count);
26064 +}
26065 +
26066 +
26067 +/**
26068 + * pfm_interrupt_handler - 1st level interrupt handler
26069 + * @ip: interrupt instruction pointer
26070 + * @regs: machine state
26071 + *
26072 + * Function called from the low-level assembly code or arch-specific perfmon
26073 + * code. Simple wrapper used for timing purpose. Actual work done in
26074 + * __pfm_overflow_handler()
26075 + */
26076 +void pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs)
26077 +{
26078 + u64 start;
26079 +
26080 + pfm_stats_inc(ovfl_intr_all_count);
26081 +
26082 + BUG_ON(!irqs_disabled());
26083 +
26084 + start = sched_clock();
26085 +
26086 + __pfm_interrupt_handler(ip, regs);
26087 +
26088 + pfm_stats_add(ovfl_intr_ns, sched_clock() - start);
26089 +}
26090 +EXPORT_SYMBOL(pfm_interrupt_handler);
26091 +
26092 diff --git a/perfmon/perfmon_msg.c b/perfmon/perfmon_msg.c
26093 new file mode 100644
26094 index 0000000..b8a1e4c
26095 --- /dev/null
26096 +++ b/perfmon/perfmon_msg.c
26097 @@ -0,0 +1,229 @@
26098 +/*
26099 + * perfmon_msg.c: perfmon2 notification message queue management
26100 + *
26101 + * This file implements the perfmon2 interface which
26102 + * provides access to the hardware performance counters
26103 + * of the host processor.
26104 + *
26105 + * The initial version of perfmon.c was written by
26106 + * Ganesh Venkitachalam, IBM Corp.
26107 + *
26108 + * Then it was modified for perfmon-1.x by Stephane Eranian and
26109 + * David Mosberger, Hewlett Packard Co.
26110 + *
26111 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
26112 + * by Stephane Eranian, Hewlett Packard Co.
26113 + *
26114 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
26115 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
26116 + * David Mosberger-Tang <davidm@hpl.hp.com>
26117 + *
26118 + * More information about perfmon available at:
26119 + * http://perfmon2.sf.net
26120 + *
26121 + * This program is free software; you can redistribute it and/or
26122 + * modify it under the terms of version 2 of the GNU General Public
26123 + * License as published by the Free Software Foundation.
26124 + *
26125 + * This program is distributed in the hope that it will be useful,
26126 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26127 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26128 + * General Public License for more details.
26129 + *
26130 + * You should have received a copy of the GNU General Public License
26131 + * along with this program; if not, write to the Free Software
26132 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26133 + * 02111-1307 USA
26134 + */
26135 +#include <linux/kernel.h>
26136 +#include <linux/poll.h>
26137 +#include <linux/perfmon_kern.h>
26138 +
26139 +/**
26140 + * pfm_get_new_msg - get a new message slot from the queue
26141 + * @ctx: context to operate on
26142 + *
26143 + * if queue if full NULL is returned
26144 + */
26145 +static union pfarg_msg *pfm_get_new_msg(struct pfm_context *ctx)
26146 +{
26147 + int next;
26148 +
26149 + next = ctx->msgq_head & PFM_MSGQ_MASK;
26150 +
26151 + if ((ctx->msgq_head - ctx->msgq_tail) == PFM_MSGS_COUNT)
26152 + return NULL;
26153 +
26154 + /*
26155 + * move to next possible slot
26156 + */
26157 + ctx->msgq_head++;
26158 +
26159 + PFM_DBG_ovfl("head=%d tail=%d msg=%d",
26160 + ctx->msgq_head & PFM_MSGQ_MASK,
26161 + ctx->msgq_tail & PFM_MSGQ_MASK,
26162 + next);
26163 +
26164 + return ctx->msgq+next;
26165 +}
26166 +
26167 +/**
26168 + * pfm_notify_user - wakeup any thread wiating on msg queue, post SIGIO
26169 + * @ctx: context to operate on
26170 + *
26171 + * message is already enqueued
26172 + */
26173 +static void pfm_notify_user(struct pfm_context *ctx)
26174 +{
26175 + if (ctx->state == PFM_CTX_ZOMBIE) {
26176 + PFM_DBG("no notification, context is zombie");
26177 + return;
26178 + }
26179 +
26180 + PFM_DBG_ovfl("waking up");
26181 +
26182 + wake_up_interruptible(&ctx->msgq_wait);
26183 +
26184 + /*
26185 + * it is safe to call kill_fasync() from an interrupt
26186 + * handler. kill_fasync() grabs two RW locks (fasync_lock,
26187 + * tasklist_lock) in read mode. There is conflict only in
26188 + * case the PMU interrupt occurs during a write mode critical
26189 + * section. This cannot happen because for both locks, the
26190 + * write mode is always using interrupt masking (write_lock_irq).
26191 + */
26192 + kill_fasync(&ctx->async_queue, SIGIO, POLL_IN);
26193 +}
26194 +
26195 +/**
26196 + * pfm_ovfl_notify - send overflow notification
26197 + * @ctx: context to operate on
26198 + * @set: which set the overflow comes from
26199 + * @ip: overflow interrupt instruction address (IIP)
26200 + *
26201 + * Appends an overflow notification message to context queue.
26202 + * call pfm_notify() to wakeup any threads and/or send a signal
26203 + *
26204 + * Context is locked and interrupts are disabled (no preemption).
26205 + */
26206 +int pfm_ovfl_notify(struct pfm_context *ctx,
26207 + struct pfm_event_set *set,
26208 + unsigned long ip)
26209 +{
26210 + union pfarg_msg *msg = NULL;
26211 + u64 *ovfl_pmds;
26212 +
26213 + if (!ctx->flags.no_msg) {
26214 + msg = pfm_get_new_msg(ctx);
26215 + if (msg == NULL) {
26216 + /*
26217 + * when message queue fills up it is because the user
26218 + * did not extract the message, yet issued
26219 + * pfm_restart(). At this point, we stop sending
26220 + * notification, thus the user will not be able to get
26221 + * new samples when using the default format.
26222 + */
26223 + PFM_DBG_ovfl("no more notification msgs");
26224 + return -1;
26225 + }
26226 +
26227 + msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL;
26228 + msg->pfm_ovfl_msg.msg_ovfl_pid = current->pid;
26229 + msg->pfm_ovfl_msg.msg_active_set = set->id;
26230 +
26231 + ovfl_pmds = msg->pfm_ovfl_msg.msg_ovfl_pmds;
26232 +
26233 + /*
26234 + * copy bitmask of all pmd that interrupted last
26235 + */
26236 + bitmap_copy(cast_ulp(ovfl_pmds), cast_ulp(set->ovfl_pmds),
26237 + ctx->regs.max_intr_pmd);
26238 +
26239 + msg->pfm_ovfl_msg.msg_ovfl_cpu = smp_processor_id();
26240 + msg->pfm_ovfl_msg.msg_ovfl_tid = current->tgid;
26241 + msg->pfm_ovfl_msg.msg_ovfl_ip = ip;
26242 +
26243 + pfm_stats_inc(ovfl_notify_count);
26244 + }
26245 +
26246 + PFM_DBG_ovfl("ip=0x%lx o_pmds=0x%llx",
26247 + ip,
26248 + (unsigned long long)set->ovfl_pmds[0]);
26249 +
26250 + pfm_notify_user(ctx);
26251 + return 0;
26252 +}
26253 +
26254 +/**
26255 + * pfm_end_notify_user - notify of thread termination
26256 + * @ctx: context to operate on
26257 + *
26258 + * In per-thread mode, when not self-monitoring, perfmon
26259 + * sends a 'end' notification message when the monitored
26260 + * thread where the context is attached is exiting.
26261 + *
26262 + * This helper message alleviates the need to track the activity
26263 + * of the thread/process when it is not directly related, i.e.,
26264 + * was attached. In other words, no needto keep the thread
26265 + * ptraced.
26266 + *
26267 + * The context must be locked and interrupts disabled.
26268 + */
26269 +int pfm_end_notify(struct pfm_context *ctx)
26270 +{
26271 + union pfarg_msg *msg;
26272 +
26273 + msg = pfm_get_new_msg(ctx);
26274 + if (msg == NULL) {
26275 + PFM_ERR("%s no more msgs", __func__);
26276 + return -1;
26277 + }
26278 + /* no leak */
26279 + memset(msg, 0, sizeof(*msg));
26280 +
26281 + msg->type = PFM_MSG_END;
26282 +
26283 + PFM_DBG("end msg: msg=%p no_msg=%d",
26284 + msg,
26285 + ctx->flags.no_msg);
26286 +
26287 + pfm_notify_user(ctx);
26288 + return 0;
26289 +}
26290 +
26291 +/**
26292 + * pfm_get_next_msg - copy the oldest message from the queue and move tail
26293 + * @ctx: context to use
26294 + * @m: where to copy the message into
26295 + *
26296 + * The tail of the queue is moved as a consequence of this call
26297 + */
26298 +void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m)
26299 +{
26300 + union pfarg_msg *next;
26301 +
26302 + PFM_DBG_ovfl("in head=%d tail=%d",
26303 + ctx->msgq_head & PFM_MSGQ_MASK,
26304 + ctx->msgq_tail & PFM_MSGQ_MASK);
26305 +
26306 + /*
26307 + * get oldest message
26308 + */
26309 + next = ctx->msgq + (ctx->msgq_tail & PFM_MSGQ_MASK);
26310 +
26311 + /*
26312 + * move tail forward
26313 + */
26314 + ctx->msgq_tail++;
26315 +
26316 + /*
26317 + * copy message, we cannot simply point to it
26318 + * as it may be re-used before we copy it out
26319 + */
26320 + *m = *next;
26321 +
26322 + PFM_DBG_ovfl("out head=%d tail=%d type=%d",
26323 + ctx->msgq_head & PFM_MSGQ_MASK,
26324 + ctx->msgq_tail & PFM_MSGQ_MASK,
26325 + m->type);
26326 +}
26327 diff --git a/perfmon/perfmon_pmu.c b/perfmon/perfmon_pmu.c
26328 new file mode 100644
26329 index 0000000..df7a9c9
26330 --- /dev/null
26331 +++ b/perfmon/perfmon_pmu.c
26332 @@ -0,0 +1,590 @@
26333 +/*
26334 + * perfmon_pmu.c: perfmon2 PMU configuration management
26335 + *
26336 + * This file implements the perfmon2 interface which
26337 + * provides access to the hardware performance counters
26338 + * of the host processor.
26339 + *
26340 + * The initial version of perfmon.c was written by
26341 + * Ganesh Venkitachalam, IBM Corp.
26342 + *
26343 + * Then it was modified for perfmon-1.x by Stephane Eranian and
26344 + * David Mosberger, Hewlett Packard Co.
26345 + *
26346 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
26347 + * by Stephane Eranian, Hewlett Packard Co.
26348 + *
26349 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
26350 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
26351 + * David Mosberger-Tang <davidm@hpl.hp.com>
26352 + *
26353 + * More information about perfmon available at:
26354 + * http://perfmon2.sf.net
26355 + *
26356 + * This program is free software; you can redistribute it and/or
26357 + * modify it under the terms of version 2 of the GNU General Public
26358 + * License as published by the Free Software Foundation.
26359 + *
26360 + * This program is distributed in the hope that it will be useful,
26361 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26362 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26363 + * General Public License for more details.
26364 + *
26365 + * You should have received a copy of the GNU General Public License
26366 + * along with this program; if not, write to the Free Software
26367 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26368 + * 02111-1307 USA
26369 + */
26370 +#include <linux/module.h>
26371 +#include <linux/perfmon_kern.h>
26372 +#include "perfmon_priv.h"
26373 +
26374 +#ifndef CONFIG_MODULE_UNLOAD
26375 +#define module_refcount(n) 1
26376 +#endif
26377 +
26378 +static __cacheline_aligned_in_smp int request_mod_in_progress;
26379 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_conf_lock);
26380 +
26381 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_acq_lock);
26382 +static u32 pfm_pmu_acquired;
26383 +
26384 +/*
26385 + * perfmon core must acces PMU information ONLY through pfm_pmu_conf
26386 + * if pfm_pmu_conf is NULL, then no description is registered
26387 + */
26388 +struct pfm_pmu_config *pfm_pmu_conf;
26389 +EXPORT_SYMBOL(pfm_pmu_conf);
26390 +
26391 +static inline int pmu_is_module(struct pfm_pmu_config *c)
26392 +{
26393 + return !(c->flags & PFM_PMUFL_IS_BUILTIN);
26394 +}
26395 +/**
26396 + * pfm_pmu_regdesc_init -- initialize regdesc structure from PMU table
26397 + * @regs: the regdesc structure to initialize
26398 + * @excl_type: the register type(s) to exclude from this regdesc
26399 + * @unvail_pmcs: unavailable PMC registers
26400 + * @unavail_pmds: unavailable PMD registers
26401 + *
26402 + * Return:
26403 + * 0 success
26404 + * errno in case of error
26405 + */
26406 +static int pfm_pmu_regdesc_init(struct pfm_regdesc *regs, int excl_type,
26407 + u64 *unavail_pmcs, u64 *unavail_pmds)
26408 +{
26409 + struct pfm_regmap_desc *d;
26410 + u16 n, n2, n_counters, i;
26411 + int first_intr_pmd = -1, max1, max2, max3;
26412 +
26413 + /*
26414 + * compute the number of implemented PMC from the
26415 + * description table
26416 + */
26417 + n = 0;
26418 + max1 = max2 = -1;
26419 + d = pfm_pmu_conf->pmc_desc;
26420 + for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
26421 + if (!(d->type & PFM_REG_I))
26422 + continue;
26423 +
26424 + if (test_bit(i, cast_ulp(unavail_pmcs)))
26425 + continue;
26426 +
26427 + if (d->type & excl_type)
26428 + continue;
26429 +
26430 + __set_bit(i, cast_ulp(regs->pmcs));
26431 +
26432 + max1 = i;
26433 + n++;
26434 + }
26435 +
26436 + if (!n) {
26437 + PFM_INFO("%s PMU description has no PMC registers",
26438 + pfm_pmu_conf->pmu_name);
26439 + return -EINVAL;
26440 + }
26441 +
26442 + regs->max_pmc = max1 + 1;
26443 + regs->num_pmcs = n;
26444 +
26445 + n = n_counters = n2 = 0;
26446 + max1 = max2 = max3 = -1;
26447 + d = pfm_pmu_conf->pmd_desc;
26448 + for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) {
26449 + if (!(d->type & PFM_REG_I))
26450 + continue;
26451 +
26452 + if (test_bit(i, cast_ulp(unavail_pmds)))
26453 + continue;
26454 +
26455 + if (d->type & excl_type)
26456 + continue;
26457 +
26458 + __set_bit(i, cast_ulp(regs->pmds));
26459 + max1 = i;
26460 + n++;
26461 +
26462 + /*
26463 + * read-write registers
26464 + */
26465 + if (!(d->type & PFM_REG_RO)) {
26466 + __set_bit(i, cast_ulp(regs->rw_pmds));
26467 + max3 = i;
26468 + n2++;
26469 + }
26470 +
26471 + /*
26472 + * counter registers
26473 + */
26474 + if (d->type & PFM_REG_C64) {
26475 + __set_bit(i, cast_ulp(regs->cnt_pmds));
26476 + n_counters++;
26477 + }
26478 +
26479 + /*
26480 + * PMD with intr capabilities
26481 + */
26482 + if (d->type & PFM_REG_INTR) {
26483 + __set_bit(i, cast_ulp(regs->intr_pmds));
26484 + if (first_intr_pmd == -1)
26485 + first_intr_pmd = i;
26486 + max2 = i;
26487 + }
26488 + }
26489 +
26490 + if (!n) {
26491 + PFM_INFO("%s PMU description has no PMD registers",
26492 + pfm_pmu_conf->pmu_name);
26493 + return -EINVAL;
26494 + }
26495 +
26496 + regs->max_pmd = max1 + 1;
26497 + regs->first_intr_pmd = first_intr_pmd;
26498 + regs->max_intr_pmd = max2 + 1;
26499 +
26500 + regs->num_counters = n_counters;
26501 + regs->num_pmds = n;
26502 + regs->max_rw_pmd = max3 + 1;
26503 + regs->num_rw_pmd = n2;
26504 +
26505 + return 0;
26506 +}
26507 +
26508 +/**
26509 + * pfm_pmu_regdesc_init_all -- initialize all regdesc structures
26510 + * @una_pmcs : unavailable PMC registers
26511 + * @una_pmds : unavailable PMD registers
26512 + *
26513 + * Return:
26514 + * 0 sucess
26515 + * errno if error
26516 + *
26517 + * We maintain 3 regdesc:
26518 + * regs_all: all available registers
26519 + * regs_sys: registers available to system-wide contexts only
26520 + * regs_thr: registers available to per-thread contexts only
26521 + */
26522 +static int pfm_pmu_regdesc_init_all(u64 *una_pmcs, u64 *una_pmds)
26523 +{
26524 + int ret;
26525 +
26526 + memset(&pfm_pmu_conf->regs_all, 0, sizeof(struct pfm_regdesc));
26527 + memset(&pfm_pmu_conf->regs_thr, 0, sizeof(struct pfm_regdesc));
26528 + memset(&pfm_pmu_conf->regs_sys, 0, sizeof(struct pfm_regdesc));
26529 +
26530 + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_all,
26531 + 0,
26532 + una_pmcs, una_pmds);
26533 + if (ret)
26534 + return ret;
26535 +
26536 + PFM_DBG("regs_all.pmcs=0x%llx",
26537 + (unsigned long long)pfm_pmu_conf->regs_all.pmcs[0]);
26538 +
26539 + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_thr,
26540 + PFM_REG_SYS,
26541 + una_pmcs, una_pmds);
26542 + if (ret)
26543 + return ret;
26544 + PFM_DBG("regs.thr.pmcs=0x%llx",
26545 + (unsigned long long)pfm_pmu_conf->regs_thr.pmcs[0]);
26546 +
26547 + ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_sys,
26548 + PFM_REG_THR,
26549 + una_pmcs, una_pmds);
26550 +
26551 + PFM_DBG("regs_sys.pmcs=0x%llx",
26552 + (unsigned long long)pfm_pmu_conf->regs_sys.pmcs[0]);
26553 +
26554 + return ret;
26555 +}
26556 +
26557 +int pfm_pmu_register(struct pfm_pmu_config *cfg)
26558 +{
26559 + u16 i, nspec, nspec_ro, num_pmcs, num_pmds, num_wc = 0;
26560 + int type, ret = -EBUSY;
26561 +
26562 + if (perfmon_disabled) {
26563 + PFM_INFO("perfmon disabled, cannot add PMU description");
26564 + return -ENOSYS;
26565 + }
26566 +
26567 + nspec = nspec_ro = num_pmds = num_pmcs = 0;
26568 +
26569 + /* some sanity checks */
26570 + if (cfg == NULL || cfg->pmu_name == NULL) {
26571 + PFM_INFO("PMU config descriptor is invalid");
26572 + return -EINVAL;
26573 + }
26574 +
26575 + /* must have a probe */
26576 + if (cfg->probe_pmu == NULL) {
26577 + PFM_INFO("PMU config has no probe routine");
26578 + return -EINVAL;
26579 + }
26580 +
26581 + /*
26582 + * execute probe routine before anything else as it
26583 + * may update configuration tables
26584 + */
26585 + if ((*cfg->probe_pmu)() == -1) {
26586 + PFM_INFO("%s PMU detection failed", cfg->pmu_name);
26587 + return -EINVAL;
26588 + }
26589 +
26590 + if (!(cfg->flags & PFM_PMUFL_IS_BUILTIN) && cfg->owner == NULL) {
26591 + PFM_INFO("PMU config %s is missing owner", cfg->pmu_name);
26592 + return -EINVAL;
26593 + }
26594 +
26595 + if (!cfg->num_pmd_entries) {
26596 + PFM_INFO("%s needs to define num_pmd_entries", cfg->pmu_name);
26597 + return -EINVAL;
26598 + }
26599 +
26600 + if (!cfg->num_pmc_entries) {
26601 + PFM_INFO("%s needs to define num_pmc_entries", cfg->pmu_name);
26602 + return -EINVAL;
26603 + }
26604 +
26605 + if (!cfg->counter_width) {
26606 + PFM_INFO("PMU config %s, zero width counters", cfg->pmu_name);
26607 + return -EINVAL;
26608 + }
26609 +
26610 + /*
26611 + * REG_RO, REG_V not supported on PMC registers
26612 + */
26613 + for (i = 0; i < cfg->num_pmc_entries; i++) {
26614 +
26615 + type = cfg->pmc_desc[i].type;
26616 +
26617 + if (type & PFM_REG_I)
26618 + num_pmcs++;
26619 +
26620 + if (type & PFM_REG_WC)
26621 + num_wc++;
26622 +
26623 + if (type & PFM_REG_V) {
26624 + PFM_INFO("PFM_REG_V is not supported on "
26625 + "PMCs (PMC%d)", i);
26626 + return -EINVAL;
26627 + }
26628 + if (type & PFM_REG_RO) {
26629 + PFM_INFO("PFM_REG_RO meaningless on "
26630 + "PMCs (PMC%u)", i);
26631 + return -EINVAL;
26632 + }
26633 + }
26634 +
26635 + if (num_wc && cfg->pmc_write_check == NULL) {
26636 + PFM_INFO("some PMCs have write-checker but no callback provided\n");
26637 + return -EINVAL;
26638 + }
26639 +
26640 + /*
26641 + * check virtual PMD registers
26642 + */
26643 + num_wc = 0;
26644 + for (i = 0; i < cfg->num_pmd_entries; i++) {
26645 +
26646 + type = cfg->pmd_desc[i].type;
26647 +
26648 + if (type & PFM_REG_I)
26649 + num_pmds++;
26650 +
26651 + if (type & PFM_REG_V) {
26652 + nspec++;
26653 + if (type & PFM_REG_RO)
26654 + nspec_ro++;
26655 + }
26656 +
26657 + if (type & PFM_REG_WC)
26658 + num_wc++;
26659 + }
26660 +
26661 + if (num_wc && cfg->pmd_write_check == NULL) {
26662 + PFM_INFO("PMD have write-checker but no callback provided\n");
26663 + return -EINVAL;
26664 + }
26665 +
26666 + if (nspec && cfg->pmd_sread == NULL) {
26667 + PFM_INFO("PMU config is missing pmd_sread()");
26668 + return -EINVAL;
26669 + }
26670 +
26671 + nspec = nspec - nspec_ro;
26672 + if (nspec && cfg->pmd_swrite == NULL) {
26673 + PFM_INFO("PMU config is missing pmd_swrite()");
26674 + return -EINVAL;
26675 + }
26676 +
26677 + if (num_pmcs >= PFM_MAX_PMCS) {
26678 + PFM_INFO("%s PMCS registers exceed name space [0-%u]",
26679 + cfg->pmu_name,
26680 + PFM_MAX_PMCS);
26681 + return -EINVAL;
26682 + }
26683 + if (num_pmds >= PFM_MAX_PMDS) {
26684 + PFM_INFO("%s PMDS registers exceed name space [0-%u]",
26685 + cfg->pmu_name,
26686 + PFM_MAX_PMDS);
26687 + return -EINVAL;
26688 + }
26689 + spin_lock(&pfm_pmu_conf_lock);
26690 +
26691 + if (pfm_pmu_conf)
26692 + goto unlock;
26693 +
26694 + if (!cfg->version)
26695 + cfg->version = "0.0";
26696 +
26697 + pfm_pmu_conf = cfg;
26698 + pfm_pmu_conf->ovfl_mask = (1ULL << cfg->counter_width) - 1;
26699 +
26700 + ret = pfm_arch_pmu_config_init(cfg);
26701 + if (ret)
26702 + goto unlock;
26703 +
26704 + ret = pfm_sysfs_add_pmu(pfm_pmu_conf);
26705 + if (ret)
26706 + pfm_pmu_conf = NULL;
26707 +
26708 +unlock:
26709 + spin_unlock(&pfm_pmu_conf_lock);
26710 +
26711 + if (ret) {
26712 + PFM_INFO("register %s PMU error %d", cfg->pmu_name, ret);
26713 + } else {
26714 + PFM_INFO("%s PMU installed", cfg->pmu_name);
26715 + /*
26716 + * (re)initialize PMU on each PMU now that we have a description
26717 + */
26718 + on_each_cpu(__pfm_init_percpu, cfg, 0);
26719 + }
26720 + return ret;
26721 +}
26722 +EXPORT_SYMBOL(pfm_pmu_register);
26723 +
26724 +/*
26725 + * remove PMU description. Caller must pass address of current
26726 + * configuration. This is mostly for sanity checking as only
26727 + * one config can exist at any time.
26728 + *
26729 + * We are using the module refcount mechanism to protect against
26730 + * removal while the configuration is being used. As long as there is
26731 + * one context, a PMU configuration cannot be removed. The protection is
26732 + * managed in module logic.
26733 + */
26734 +void pfm_pmu_unregister(struct pfm_pmu_config *cfg)
26735 +{
26736 + if (!(cfg || pfm_pmu_conf))
26737 + return;
26738 +
26739 + spin_lock(&pfm_pmu_conf_lock);
26740 +
26741 + BUG_ON(module_refcount(pfm_pmu_conf->owner));
26742 +
26743 + if (cfg->owner == pfm_pmu_conf->owner) {
26744 + pfm_sysfs_remove_pmu(pfm_pmu_conf);
26745 + pfm_pmu_conf = NULL;
26746 + }
26747 +
26748 + spin_unlock(&pfm_pmu_conf_lock);
26749 +}
26750 +EXPORT_SYMBOL(pfm_pmu_unregister);
26751 +
26752 +static int pfm_pmu_request_module(void)
26753 +{
26754 + char *mod_name;
26755 + int ret;
26756 +
26757 + mod_name = pfm_arch_get_pmu_module_name();
26758 + if (mod_name == NULL)
26759 + return -ENOSYS;
26760 +
26761 + ret = request_module(mod_name);
26762 +
26763 + PFM_DBG("mod=%s ret=%d\n", mod_name, ret);
26764 + return ret;
26765 +}
26766 +
26767 +/*
26768 + * autoload:
26769 + * 0 : do not try to autoload the PMU description module
26770 + * not 0 : try to autoload the PMU description module
26771 + */
26772 +int pfm_pmu_conf_get(int autoload)
26773 +{
26774 + int ret;
26775 +
26776 + spin_lock(&pfm_pmu_conf_lock);
26777 +
26778 + if (request_mod_in_progress) {
26779 + ret = -ENOSYS;
26780 + goto skip;
26781 + }
26782 +
26783 + if (autoload && pfm_pmu_conf == NULL) {
26784 +
26785 + request_mod_in_progress = 1;
26786 +
26787 + spin_unlock(&pfm_pmu_conf_lock);
26788 +
26789 + pfm_pmu_request_module();
26790 +
26791 + spin_lock(&pfm_pmu_conf_lock);
26792 +
26793 + request_mod_in_progress = 0;
26794 +
26795 + /*
26796 + * request_module() may succeed but the module
26797 + * may not have registered properly so we need
26798 + * to check
26799 + */
26800 + }
26801 +
26802 + ret = pfm_pmu_conf == NULL ? -ENOSYS : 0;
26803 + if (!ret && pmu_is_module(pfm_pmu_conf)
26804 + && !try_module_get(pfm_pmu_conf->owner))
26805 + ret = -ENOSYS;
26806 +
26807 +skip:
26808 + spin_unlock(&pfm_pmu_conf_lock);
26809 +
26810 + return ret;
26811 +}
26812 +
26813 +void pfm_pmu_conf_put(void)
26814 +{
26815 + if (pfm_pmu_conf == NULL || !pmu_is_module(pfm_pmu_conf))
26816 + return;
26817 +
26818 + spin_lock(&pfm_pmu_conf_lock);
26819 + module_put(pfm_pmu_conf->owner);
26820 + spin_unlock(&pfm_pmu_conf_lock);
26821 +}
26822 +
26823 +
26824 +/*
26825 + * acquire PMU resource from lower-level PMU register allocator
26826 + * (currently perfctr-watchdog.c)
26827 + *
26828 + * acquisition is done when the first context is created (and not
26829 + * when it is loaded). We grab all that is defined in the description
26830 + * module and then we make adjustments at the arch-specific level.
26831 + *
26832 + * The PMU resource is released when the last perfmon context is
26833 + * destroyed.
26834 + *
26835 + * interrupts are not masked
26836 + */
26837 +int pfm_pmu_acquire(struct pfm_context *ctx)
26838 +{
26839 + u64 unavail_pmcs[PFM_PMC_BV];
26840 + u64 unavail_pmds[PFM_PMD_BV];
26841 + int ret = 0;
26842 +
26843 + spin_lock(&pfm_pmu_acq_lock);
26844 +
26845 + PFM_DBG("pmu_acquired=%u", pfm_pmu_acquired);
26846 +
26847 + pfm_pmu_acquired++;
26848 +
26849 + /*
26850 + * we need to initialize regdesc each time we re-acquire
26851 + * the PMU for the first time as there may have been changes
26852 + * in the list of available registers, e.g., NMI may have
26853 + * been disabled. Checking on PMU module insert is not
26854 + * enough
26855 + */
26856 + if (pfm_pmu_acquired == 1) {
26857 + memset(unavail_pmcs, 0, sizeof(unavail_pmcs));
26858 + memset(unavail_pmds, 0, sizeof(unavail_pmds));
26859 +
26860 + ret = pfm_arch_pmu_acquire(unavail_pmcs, unavail_pmds);
26861 + if (ret) {
26862 + pfm_pmu_acquired--;
26863 + } else {
26864 + pfm_pmu_regdesc_init_all(unavail_pmcs, unavail_pmds);
26865 +
26866 + /* available PMU ressources */
26867 + PFM_DBG("PMU acquired: %u PMCs, %u PMDs, %u counters",
26868 + pfm_pmu_conf->regs_all.num_pmcs,
26869 + pfm_pmu_conf->regs_all.num_pmds,
26870 + pfm_pmu_conf->regs_all.num_counters);
26871 + }
26872 + }
26873 + spin_unlock(&pfm_pmu_acq_lock);
26874 +
26875 + /*
26876 + * copy the regdesc that corresponds to the context
26877 + * we copy and not just point because it helps with
26878 + * memory locality. the regdesc structure is accessed
26879 + * very frequently in performance critical code such
26880 + * as context switch and interrupt handling. By using
26881 + * a local copy, we increase memory footprint, but
26882 + * increase chance to have local memory access,
26883 + * especially for system-wide contexts.
26884 + */
26885 + if (ctx->flags.system)
26886 + ctx->regs = pfm_pmu_conf->regs_sys;
26887 + else
26888 + ctx->regs = pfm_pmu_conf->regs_thr;
26889 +
26890 + return ret;
26891 +}
26892 +
26893 +/*
26894 + * release the PMU resource
26895 + *
26896 + * actual release happens when last context is destroyed
26897 + *
26898 + * interrupts are not masked
26899 + */
26900 +void pfm_pmu_release(void)
26901 +{
26902 + BUG_ON(irqs_disabled());
26903 +
26904 + /*
26905 + * we need to use a spinlock because release takes some time
26906 + * and we may have a race with pfm_pmu_acquire()
26907 + */
26908 + spin_lock(&pfm_pmu_acq_lock);
26909 +
26910 + PFM_DBG("pmu_acquired=%d", pfm_pmu_acquired);
26911 +
26912 + /*
26913 + * we decouple test and decrement because if we had errors
26914 + * in pfm_pmu_acquire(), we still come here on pfm_context_free()
26915 + * but with pfm_pmu_acquire=0
26916 + */
26917 + if (pfm_pmu_acquired > 0 && --pfm_pmu_acquired == 0) {
26918 + pfm_arch_pmu_release();
26919 + PFM_DBG("PMU released");
26920 + }
26921 + spin_unlock(&pfm_pmu_acq_lock);
26922 +}
26923 diff --git a/perfmon/perfmon_priv.h b/perfmon/perfmon_priv.h
26924 new file mode 100644
26925 index 0000000..5b485de
26926 --- /dev/null
26927 +++ b/perfmon/perfmon_priv.h
26928 @@ -0,0 +1,182 @@
26929 +/*
26930 + * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
26931 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
26932 + *
26933 + * This program is free software; you can redistribute it and/or
26934 + * modify it under the terms of version 2 of the GNU General Public
26935 + * License as published by the Free Software Foundation.
26936 + *
26937 + * This program is distributed in the hope that it will be useful,
26938 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
26939 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26940 + * General Public License for more details.
26941 + *
26942 + * You should have received a copy of the GNU General Public License
26943 + * along with this program; if not, write to the Free Software
26944 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26945 + * 02111-1307 USA
26946 + */
26947 +
26948 +#ifndef __PERFMON_PRIV_H__
26949 +#define __PERFMON_PRIV_H__
26950 +/*
26951 + * This file contains all the definitions of data structures, variables, macros
26952 + * that are to private to the generic code, i.e., not shared with any code that
26953 + * lives under arch/ or include/asm-XX
26954 + *
26955 + * For shared definitions, use include/linux/perfmon_kern.h
26956 + */
26957 +
26958 +#ifdef CONFIG_PERFMON
26959 +
26960 +/*
26961 + * type of PMD reset for pfm_reset_pmds() or pfm_switch_sets*()
26962 + */
26963 +#define PFM_PMD_RESET_SHORT 1 /* use short reset value */
26964 +#define PFM_PMD_RESET_LONG 2 /* use long reset value */
26965 +
26966 +/*
26967 + * context lazy save/restore activation count
26968 + */
26969 +#define PFM_INVALID_ACTIVATION ((u64)~0)
26970 +
26971 +DECLARE_PER_CPU(u64, pmu_activation_number);
26972 +DECLARE_PER_CPU(struct hrtimer, pfm_hrtimer);
26973 +
26974 +static inline void pfm_set_pmu_owner(struct task_struct *task,
26975 + struct pfm_context *ctx)
26976 +{
26977 + __get_cpu_var(pmu_owner) = task;
26978 + __get_cpu_var(pmu_ctx) = ctx;
26979 +}
26980 +
26981 +static inline int pfm_msgq_is_empty(struct pfm_context *ctx)
26982 +{
26983 + return ctx->msgq_head == ctx->msgq_tail;
26984 +}
26985 +
26986 +void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m);
26987 +int pfm_end_notify(struct pfm_context *ctx);
26988 +int pfm_ovfl_notify(struct pfm_context *ctx, struct pfm_event_set *set,
26989 + unsigned long ip);
26990 +
26991 +int pfm_alloc_fd(struct file **cfile);
26992 +
26993 +int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count);
26994 +int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req,
26995 + int count);
26996 +int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req,
26997 + int count);
26998 +
26999 +
27000 +int pfm_init_ctx(void);
27001 +
27002 +int pfm_pmu_acquire(struct pfm_context *ctx);
27003 +void pfm_pmu_release(void);
27004 +
27005 +int pfm_session_acquire(int is_system, u32 cpu);
27006 +void pfm_session_release(int is_system, u32 cpu);
27007 +
27008 +int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size);
27009 +int pfm_smpl_buf_load_context(struct pfm_context *ctx);
27010 +void pfm_smpl_buf_unload_context(struct pfm_context *ctx);
27011 +
27012 +int pfm_init_sysfs(void);
27013 +
27014 +#ifdef CONFIG_PERFMON_DEBUG_FS
27015 +int pfm_init_debugfs(void);
27016 +int pfm_debugfs_add_cpu(int mycpu);
27017 +void pfm_debugfs_del_cpu(int mycpu);
27018 +#else
27019 +static inline int pfm_init_debugfs(void)
27020 +{
27021 + return 0;
27022 +}
27023 +static inline int pfm_debugfs_add_cpu(int mycpu)
27024 +{
27025 + return 0;
27026 +}
27027 +
27028 +static inline void pfm_debugfs_del_cpu(int mycpu)
27029 +{}
27030 +#endif
27031 +
27032 +
27033 +void pfm_reset_pmds(struct pfm_context *ctx, struct pfm_event_set *set,
27034 + int num_pmds,
27035 + int reset_mode);
27036 +
27037 +struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set);
27038 +int pfm_init_sets(void);
27039 +
27040 +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what);
27041 +
27042 +void pfm_free_sets(struct pfm_context *ctx);
27043 +int pfm_create_initial_set(struct pfm_context *ctx);
27044 +void pfm_switch_sets_from_intr(struct pfm_context *ctx);
27045 +void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set);
27046 +enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t);
27047 +
27048 +enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx,
27049 + struct pfm_event_set *new_set,
27050 + int reset_mode,
27051 + int no_restart);
27052 +
27053 +/**
27054 + * pfm_save_prev_ctx - check if previous context exists and save state
27055 + *
27056 + * called from pfm_load_ctx_thread() and __pfm_ctxsin_thread() to
27057 + * check if previous context exists. If so saved its PMU state. This is used
27058 + * only for UP kernels.
27059 + *
27060 + * PMU ownership is not cleared because the function is always called while
27061 + * trying to install a new owner.
27062 + */
27063 +static inline void pfm_check_save_prev_ctx(void)
27064 +{
27065 +#ifdef CONFIG_SMP
27066 + struct pfm_event_set *set;
27067 + struct pfm_context *ctxp;
27068 +
27069 + ctxp = __get_cpu_var(pmu_ctx);
27070 + if (!ctxp)
27071 + return;
27072 + /*
27073 + * in UP per-thread, due to lazy save
27074 + * there could be a context from another
27075 + * task. We need to push it first before
27076 + * installing our new state
27077 + */
27078 + set = ctxp->active_set;
27079 + pfm_save_pmds(ctxp, set);
27080 + /*
27081 + * do not clear ownership because we rewrite
27082 + * right away
27083 + */
27084 +#endif
27085 +}
27086 +
27087 +
27088 +int pfm_init_fs(void);
27089 +
27090 +int pfm_init_hotplug(void);
27091 +
27092 +void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set);
27093 +void pfm_resume_after_ovfl(struct pfm_context *ctx);
27094 +int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg,
27095 + struct file *filp);
27096 +
27097 +static inline void pfm_post_work(struct task_struct *task,
27098 + struct pfm_context *ctx, int type)
27099 +{
27100 + ctx->flags.work_type = type;
27101 + set_tsk_thread_flag(task, TIF_PERFMON_WORK);
27102 + pfm_arch_arm_handle_work(task);
27103 +}
27104 +
27105 +#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG
27106 +#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG
27107 +
27108 +#endif /* CONFIG_PERFMON */
27109 +
27110 +#endif /* __PERFMON_PRIV_H__ */
27111 diff --git a/perfmon/perfmon_res.c b/perfmon/perfmon_res.c
27112 new file mode 100644
27113 index 0000000..7b0382b
27114 --- /dev/null
27115 +++ b/perfmon/perfmon_res.c
27116 @@ -0,0 +1,450 @@
27117 +/*
27118 + * perfmon_res.c: perfmon2 resource allocations
27119 + *
27120 + * This file implements the perfmon2 interface which
27121 + * provides access to the hardware performance counters
27122 + * of the host processor.
27123 + *
27124 + * The initial version of perfmon.c was written by
27125 + * Ganesh Venkitachalam, IBM Corp.
27126 + *
27127 + * Then it was modified for perfmon-1.x by Stephane Eranian and
27128 + * David Mosberger, Hewlett Packard Co.
27129 + *
27130 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
27131 + * by Stephane Eranian, Hewlett Packard Co.
27132 + *
27133 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
27134 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
27135 + * David Mosberger-Tang <davidm@hpl.hp.com>
27136 + *
27137 + * More information about perfmon available at:
27138 + * http://perfmon2.sf.net
27139 + *
27140 + * This program is free software; you can redistribute it and/or
27141 + * modify it under the terms of version 2 of the GNU General Public
27142 + * License as published by the Free Software Foundation.
27143 + *
27144 + * This program is distributed in the hope that it will be useful,
27145 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
27146 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27147 + * General Public License for more details.
27148 + *
27149 + * You should have received a copy of the GNU General Public License
27150 + * along with this program; if not, write to the Free Software
27151 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27152 + * 02111-1307 USA
27153 + */
27154 +#include <linux/kernel.h>
27155 +#include <linux/module.h>
27156 +#include <linux/perfmon_kern.h>
27157 +#include "perfmon_priv.h"
27158 +
27159 +/*
27160 + * global information about all sessions
27161 + * mostly used to synchronize between system wide and per-process
27162 + */
27163 +struct pfm_resources {
27164 + size_t smpl_buf_mem_cur;/* current smpl buf mem usage */
27165 + cpumask_t sys_cpumask; /* bitmask of used cpus */
27166 + u32 thread_sessions; /* #num loaded per-thread sessions */
27167 +};
27168 +
27169 +static struct pfm_resources pfm_res;
27170 +
27171 +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_res_lock);
27172 +
27173 +/**
27174 + * pfm_smpl_buf_space_acquire - check memory resource usage for sampling buffer
27175 + * @ctx: context of interest
27176 + * @size: size fo requested buffer
27177 + *
27178 + * sampling buffer allocated by perfmon must be
27179 + * checked against max locked memory usage thresholds
27180 + * for security reasons.
27181 + *
27182 + * The first level check is against the system wide limit
27183 + * as indicated by the system administrator in /sys/kernel/perfmon
27184 + *
27185 + * The second level check is on a per-process basis using
27186 + * RLIMIT_MEMLOCK limit.
27187 + *
27188 + * Operating on the current task only.
27189 + */
27190 +int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size)
27191 +{
27192 + struct mm_struct *mm;
27193 + unsigned long locked;
27194 + unsigned long buf_mem, buf_mem_max;
27195 + unsigned long flags;
27196 +
27197 + spin_lock_irqsave(&pfm_res_lock, flags);
27198 +
27199 + /*
27200 + * check against global buffer limit
27201 + */
27202 + buf_mem_max = pfm_controls.smpl_buffer_mem_max;
27203 + buf_mem = pfm_res.smpl_buf_mem_cur + size;
27204 +
27205 + if (buf_mem <= buf_mem_max) {
27206 + pfm_res.smpl_buf_mem_cur = buf_mem;
27207 +
27208 + PFM_DBG("buf_mem_max=%lu current_buf_mem=%lu",
27209 + buf_mem_max,
27210 + buf_mem);
27211 + }
27212 +
27213 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27214 +
27215 + if (buf_mem > buf_mem_max) {
27216 + PFM_DBG("smpl buffer memory threshold reached");
27217 + return -ENOMEM;
27218 + }
27219 +
27220 + /*
27221 + * check against per-process RLIMIT_MEMLOCK
27222 + */
27223 + mm = get_task_mm(current);
27224 +
27225 + down_write(&mm->mmap_sem);
27226 +
27227 + locked = mm->locked_vm << PAGE_SHIFT;
27228 + locked += size;
27229 +
27230 + if (locked > current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) {
27231 +
27232 + PFM_DBG("RLIMIT_MEMLOCK reached ask_locked=%lu rlim_cur=%lu",
27233 + locked,
27234 + current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur);
27235 +
27236 + up_write(&mm->mmap_sem);
27237 + mmput(mm);
27238 + goto unres;
27239 + }
27240 +
27241 + mm->locked_vm = locked >> PAGE_SHIFT;
27242 +
27243 + up_write(&mm->mmap_sem);
27244 +
27245 + mmput(mm);
27246 +
27247 + return 0;
27248 +
27249 +unres:
27250 + /*
27251 + * remove global buffer memory allocation
27252 + */
27253 + spin_lock_irqsave(&pfm_res_lock, flags);
27254 +
27255 + pfm_res.smpl_buf_mem_cur -= size;
27256 +
27257 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27258 +
27259 + return -ENOMEM;
27260 +}
27261 +/**
27262 + * pfm_smpl_buf_space_release - release resource usage for sampling buffer
27263 + * @ctx: perfmon context of interest
27264 + *
27265 + * There exist multiple paths leading to this function. We need to
27266 + * be very careful withlokcing on the mmap_sem as it may already be
27267 + * held by the time we come here.
27268 + * The following paths exist:
27269 + *
27270 + * exit path:
27271 + * sys_exit_group
27272 + * do_group_exit
27273 + * do_exit
27274 + * exit_mm
27275 + * mmput
27276 + * exit_mmap
27277 + * remove_vma
27278 + * fput
27279 + * __fput
27280 + * pfm_close
27281 + * __pfm_close
27282 + * pfm_context_free
27283 + * pfm_release_buf_space
27284 + * munmap path:
27285 + * sys_munmap
27286 + * do_munmap
27287 + * remove_vma
27288 + * fput
27289 + * __fput
27290 + * pfm_close
27291 + * __pfm_close
27292 + * pfm_context_free
27293 + * pfm_release_buf_space
27294 + *
27295 + * close path:
27296 + * sys_close
27297 + * filp_close
27298 + * fput
27299 + * __fput
27300 + * pfm_close
27301 + * __pfm_close
27302 + * pfm_context_free
27303 + * pfm_release_buf_space
27304 + *
27305 + * The issue is that on the munmap() path, the mmap_sem is already held
27306 + * in write-mode by the time we come here. To avoid the deadlock, we need
27307 + * to know where we are coming from and skip down_write(). If is fairly
27308 + * difficult to know this because of the lack of good hooks and
27309 + * the fact that, there may not have been any mmap() of the sampling buffer
27310 + * (i.e. create_context() followed by close() or exit()).
27311 + *
27312 + * We use a set flag ctx->flags.mmap_nlock which is toggled in the vm_ops
27313 + * callback in remove_vma() which is called systematically for the call, so
27314 + * on all but the pure close() path. The exit path does not already hold
27315 + * the lock but this is exit so there is no task->mm by the time we come here.
27316 + *
27317 + * The mmap_nlock is set only when unmapping and this is the LAST reference
27318 + * to the file (i.e., close() followed by munmap()).
27319 + */
27320 +void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size)
27321 +{
27322 + unsigned long flags;
27323 + struct mm_struct *mm;
27324 +
27325 + mm = get_task_mm(current);
27326 + if (mm) {
27327 + if (ctx->flags.mmap_nlock == 0) {
27328 + PFM_DBG("doing down_write");
27329 + down_write(&mm->mmap_sem);
27330 + }
27331 +
27332 + mm->locked_vm -= size >> PAGE_SHIFT;
27333 +
27334 + PFM_DBG("size=%zu locked_vm=%lu", size, mm->locked_vm);
27335 +
27336 + if (ctx->flags.mmap_nlock == 0)
27337 + up_write(&mm->mmap_sem);
27338 +
27339 + mmput(mm);
27340 + }
27341 +
27342 + spin_lock_irqsave(&pfm_res_lock, flags);
27343 +
27344 + pfm_res.smpl_buf_mem_cur -= size;
27345 +
27346 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27347 +}
27348 +
27349 +/**
27350 + * pfm_session_acquire - reserve a per-thread or per-cpu session
27351 + * @is_system: true if per-cpu session
27352 + * @cpu: cpu number for per-cpu session
27353 + *
27354 + * return:
27355 + * 0 : success
27356 + * -EBUSY: if conflicting session exist
27357 + */
27358 +int pfm_session_acquire(int is_system, u32 cpu)
27359 +{
27360 + unsigned long flags;
27361 + u32 nsys_cpus;
27362 + int ret = 0;
27363 +
27364 + /*
27365 + * validy checks on cpu_mask have been done upstream
27366 + */
27367 + spin_lock_irqsave(&pfm_res_lock, flags);
27368 +
27369 + nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
27370 +
27371 + PFM_DBG("in sys=%u task=%u is_sys=%d cpu=%u",
27372 + nsys_cpus,
27373 + pfm_res.thread_sessions,
27374 + is_system,
27375 + cpu);
27376 +
27377 + if (is_system) {
27378 + /*
27379 + * cannot mix system wide and per-task sessions
27380 + */
27381 + if (pfm_res.thread_sessions > 0) {
27382 + PFM_DBG("%u conflicting thread_sessions",
27383 + pfm_res.thread_sessions);
27384 + ret = -EBUSY;
27385 + goto abort;
27386 + }
27387 +
27388 + if (cpu_isset(cpu, pfm_res.sys_cpumask)) {
27389 + PFM_DBG("conflicting session on CPU%u", cpu);
27390 + ret = -EBUSY;
27391 + goto abort;
27392 + }
27393 +
27394 + PFM_DBG("reserved session on CPU%u", cpu);
27395 +
27396 + cpu_set(cpu, pfm_res.sys_cpumask);
27397 + nsys_cpus++;
27398 + } else {
27399 + if (nsys_cpus) {
27400 + ret = -EBUSY;
27401 + goto abort;
27402 + }
27403 + pfm_res.thread_sessions++;
27404 + }
27405 +
27406 + PFM_DBG("out sys=%u task=%u is_sys=%d cpu=%u",
27407 + nsys_cpus,
27408 + pfm_res.thread_sessions,
27409 + is_system,
27410 + cpu);
27411 +
27412 +abort:
27413 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27414 +
27415 + return ret;
27416 +}
27417 +
27418 +/**
27419 + * pfm_session_release - release a per-cpu or per-thread session
27420 + * @is_system: true if per-cpu session
27421 + * @cpu: cpu number for per-cpu session
27422 + *
27423 + * called from __pfm_unload_context()
27424 + */
27425 +void pfm_session_release(int is_system, u32 cpu)
27426 +{
27427 + unsigned long flags;
27428 +
27429 + spin_lock_irqsave(&pfm_res_lock, flags);
27430 +
27431 + PFM_DBG("in sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u",
27432 + cpus_weight(pfm_res.sys_cpumask),
27433 + pfm_res.thread_sessions,
27434 + is_system, cpu);
27435 +
27436 + if (is_system)
27437 + cpu_clear(cpu, pfm_res.sys_cpumask);
27438 + else
27439 + pfm_res.thread_sessions--;
27440 +
27441 + PFM_DBG("out sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u",
27442 + cpus_weight(pfm_res.sys_cpumask),
27443 + pfm_res.thread_sessions,
27444 + is_system, cpu);
27445 +
27446 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27447 +}
27448 +
27449 +/**
27450 + * pfm_session_allcpus_acquire - acquire per-cpu sessions on all available cpus
27451 + *
27452 + * currently used by Oprofile on X86
27453 + */
27454 +int pfm_session_allcpus_acquire(void)
27455 +{
27456 + unsigned long flags;
27457 + u32 nsys_cpus, cpu;
27458 + int ret = -EBUSY;
27459 +
27460 + spin_lock_irqsave(&pfm_res_lock, flags);
27461 +
27462 + nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
27463 +
27464 + PFM_DBG("in sys=%u task=%u",
27465 + nsys_cpus,
27466 + pfm_res.thread_sessions);
27467 +
27468 + if (nsys_cpus) {
27469 + PFM_DBG("already some system-wide sessions");
27470 + goto abort;
27471 + }
27472 +
27473 + /*
27474 + * cannot mix system wide and per-task sessions
27475 + */
27476 + if (pfm_res.thread_sessions) {
27477 + PFM_DBG("%u conflicting thread_sessions",
27478 + pfm_res.thread_sessions);
27479 + goto abort;
27480 + }
27481 +
27482 + for_each_online_cpu(cpu) {
27483 + cpu_set(cpu, pfm_res.sys_cpumask);
27484 + nsys_cpus++;
27485 + }
27486 +
27487 + PFM_DBG("out sys=%u task=%u",
27488 + nsys_cpus,
27489 + pfm_res.thread_sessions);
27490 +
27491 + ret = 0;
27492 +abort:
27493 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27494 +
27495 + return ret;
27496 +}
27497 +EXPORT_SYMBOL(pfm_session_allcpus_acquire);
27498 +
27499 +/**
27500 + * pfm_session_allcpus_release - relase per-cpu sessions on all cpus
27501 + *
27502 + * currently used by Oprofile code
27503 + */
27504 +void pfm_session_allcpus_release(void)
27505 +{
27506 + unsigned long flags;
27507 + u32 nsys_cpus, cpu;
27508 +
27509 + spin_lock_irqsave(&pfm_res_lock, flags);
27510 +
27511 + nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
27512 +
27513 + PFM_DBG("in sys=%u task=%u",
27514 + nsys_cpus,
27515 + pfm_res.thread_sessions);
27516 +
27517 + /*
27518 + * XXX: could use __cpus_clear() with nbits
27519 + */
27520 + for_each_online_cpu(cpu) {
27521 + cpu_clear(cpu, pfm_res.sys_cpumask);
27522 + nsys_cpus--;
27523 + }
27524 +
27525 + PFM_DBG("out sys=%u task=%u",
27526 + nsys_cpus,
27527 + pfm_res.thread_sessions);
27528 +
27529 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27530 +}
27531 +EXPORT_SYMBOL(pfm_session_allcpus_release);
27532 +
27533 +/**
27534 + * pfm_sysfs_res_show - return currnt resourcde usage for sysfs
27535 + * @buf: buffer to hold string in return
27536 + * @sz: size of buf
27537 + * @what: what to produce
27538 + * what=0 : thread_sessions
27539 + * what=1 : cpus_weight(sys_cpumask)
27540 + * what=2 : smpl_buf_mem_cur
27541 + * what=3 : pmu model name
27542 + *
27543 + * called from perfmon_sysfs.c
27544 + * return number of bytes written into buf (up to sz)
27545 + */
27546 +ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what)
27547 +{
27548 + unsigned long flags;
27549 +
27550 + spin_lock_irqsave(&pfm_res_lock, flags);
27551 +
27552 + switch (what) {
27553 + case 0: snprintf(buf, sz, "%u\n", pfm_res.thread_sessions);
27554 + break;
27555 + case 1: snprintf(buf, sz, "%d\n", cpus_weight(pfm_res.sys_cpumask));
27556 + break;
27557 + case 2: snprintf(buf, sz, "%zu\n", pfm_res.smpl_buf_mem_cur);
27558 + break;
27559 + case 3:
27560 + snprintf(buf, sz, "%s\n",
27561 + pfm_pmu_conf ? pfm_pmu_conf->pmu_name
27562 + : "unknown\n");
27563 + }
27564 + spin_unlock_irqrestore(&pfm_res_lock, flags);
27565 + return strlen(buf);
27566 +}
27567 diff --git a/perfmon/perfmon_rw.c b/perfmon/perfmon_rw.c
27568 new file mode 100644
27569 index 0000000..3168eb7
27570 --- /dev/null
27571 +++ b/perfmon/perfmon_rw.c
27572 @@ -0,0 +1,733 @@
27573 +/*
27574 + * perfmon.c: perfmon2 PMC/PMD read/write system calls
27575 + *
27576 + * This file implements the perfmon2 interface which
27577 + * provides access to the hardware performance counters
27578 + * of the host processor.
27579 + *
27580 + * The initial version of perfmon.c was written by
27581 + * Ganesh Venkitachalam, IBM Corp.
27582 + *
27583 + * Then it was modified for perfmon-1.x by Stephane Eranian and
27584 + * David Mosberger, Hewlett Packard Co.
27585 + *
27586 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
27587 + * by Stephane Eranian, Hewlett Packard Co.
27588 + *
27589 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
27590 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
27591 + * David Mosberger-Tang <davidm@hpl.hp.com>
27592 + *
27593 + * More information about perfmon available at:
27594 + * http://perfmon2.sf.net/
27595 + *
27596 + * This program is free software; you can redistribute it and/or
27597 + * modify it under the terms of version 2 of the GNU General Public
27598 + * License as published by the Free Software Foundation.
27599 + *
27600 + * This program is distributed in the hope that it will be useful,
27601 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
27602 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27603 + * General Public License for more details.
27604 + *
27605 + * You should have received a copy of the GNU General Public License
27606 + * along with this program; if not, write to the Free Software
27607 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27608 + * 02111-1307 USA
27609 + */
27610 +#include <linux/module.h>
27611 +#include <linux/kernel.h>
27612 +#include <linux/perfmon_kern.h>
27613 +#include "perfmon_priv.h"
27614 +
27615 +#define PFM_REGFL_PMC_ALL (PFM_REGFL_NO_EMUL64)
27616 +#define PFM_REGFL_PMD_ALL (PFM_REGFL_RANDOM|PFM_REGFL_OVFL_NOTIFY)
27617 +
27618 +/**
27619 + * update_used_reg -- updated used_pmcs for a single PMD
27620 + * @set: set to update
27621 + * @cnum: new PMD to add
27622 + *
27623 + * This function adds the pmds and pmcs depending on PMD cnum
27624 + */
27625 +static inline void update_used_reg(struct pfm_context *ctx,
27626 + struct pfm_event_set *set, u16 cnum)
27627 +{
27628 + bitmap_or(cast_ulp(set->used_pmcs),
27629 + cast_ulp(set->used_pmcs),
27630 + cast_ulp(pfm_pmu_conf->pmd_desc[cnum].dep_pmcs),
27631 + ctx->regs.max_pmc);
27632 +}
27633 +
27634 +/**
27635 + * update_used -- update used_pmcs bitmask
27636 + * @set: event set to update
27637 + * @bv: bitmask to inspect for new PMD registers
27638 + *
27639 + * This function updates the used_pmcs bitmask for
27640 + * the set using bv, a bitmask of pmds. For each pmd in bv,
27641 + * its depending pmcs are added to used_pmcs.
27642 + */
27643 +static void update_used_pmcs(struct pfm_context *ctx,
27644 + struct pfm_event_set *set, unsigned long *bv)
27645 +{
27646 + u16 max_pmd;
27647 + int n, p, q;
27648 +
27649 + max_pmd = ctx->regs.max_pmd;
27650 +
27651 + n = bitmap_weight(bv, max_pmd);
27652 + for(p = 0; n; n--, p = q+1) {
27653 + q = find_next_bit(bv, max_pmd, p);
27654 + update_used_reg(ctx, set, q);
27655 + }
27656 +}
27657 +
27658 +/**
27659 + * update_changes -- update nused_pmcs, nused_pmds, write newly touched pmcs
27660 + * @ctx: context to use
27661 + * @set: event set to use
27662 + * @old_used_pmcs: former used_pmc bitmask
27663 + * @can_access: non-zero if PMU is accessible, i.e., can be written to
27664 + *
27665 + * This function updates nused_pmcs and nused_pmds after the last modificiation
27666 + * to an event set. When new pmcs are used, then they must be initialized such
27667 + * that we do not pick up stale values from another session.
27668 + */
27669 +static inline int update_changes(struct pfm_context *ctx, struct pfm_event_set *set,
27670 + unsigned long *old_used_pmcs)
27671 +{
27672 + struct pfarg_pmc req;
27673 + u16 max_pmc, max_pmd;
27674 + int n, p, q, ret = 0;
27675 +
27676 + max_pmd = ctx->regs.max_pmd;
27677 + max_pmc = ctx->regs.max_pmc;
27678 +
27679 + /*
27680 + * update used counts
27681 + */
27682 + set->nused_pmds = bitmap_weight(cast_ulp(set->used_pmds), max_pmd);
27683 + set->nused_pmcs = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc);
27684 +
27685 + PFM_DBG("set%u u_pmds=0x%llx nu_pmds=%u u_pmcs=0x%llx nu_pmcs=%u",
27686 + set->id,
27687 + (unsigned long long)set->used_pmds[0],
27688 + set->nused_pmds,
27689 + (unsigned long long)set->used_pmcs[0],
27690 + set->nused_pmcs);
27691 +
27692 + memset(&req, 0, sizeof(req));
27693 +
27694 + n = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc);
27695 + for(p = 0; n; n--, p = q+1) {
27696 + q = find_next_bit(cast_ulp(set->used_pmcs), max_pmc, p);
27697 +
27698 + if (test_bit(q, cast_ulp(old_used_pmcs)))
27699 + continue;
27700 +
27701 + req.reg_num = q;
27702 + req.reg_value = set->pmcs[q];
27703 +
27704 + ret = __pfm_write_pmcs(ctx, &req, 1);
27705 + if (ret)
27706 + break;
27707 + }
27708 + return ret;
27709 +}
27710 +
27711 +/**
27712 + * handle_smpl_bv - checks sampling bitmasks for new PMDs
27713 + * @ctx: context to use
27714 + * @set: set to use
27715 + * @bv: sampling bitmask
27716 + *
27717 + * scans the smpl bitmask looking for new PMDs (not yet used), if found
27718 + * invoke pfm_write_pmds() on them to get them initialized and marked used
27719 + */
27720 +static int handle_smpl_bv(struct pfm_context *ctx, struct pfm_event_set *set,
27721 + unsigned long *bv)
27722 +{
27723 + struct pfarg_pmd req;
27724 + int p, q, n, ret = 0;
27725 + u16 max_pmd;
27726 +
27727 + memset(&req, 0, sizeof(req));
27728 +
27729 + max_pmd = ctx->regs.max_pmd;
27730 +
27731 + n = bitmap_weight(cast_ulp(bv), max_pmd);
27732 +
27733 + for(p = 0; n; n--, p = q+1) {
27734 + q = find_next_bit(cast_ulp(bv), max_pmd, p);
27735 +
27736 + if (test_bit(q, cast_ulp(set->used_pmds)))
27737 + continue;
27738 +
27739 + req.reg_num = q;
27740 + req.reg_value = 0;
27741 +
27742 + ret = __pfm_write_pmds(ctx, &req, 1, 0);
27743 + if (ret)
27744 + break;
27745 + }
27746 + return ret;
27747 +}
27748 +
27749 +/**
27750 + * is_invalid -- check if register index is within limits
27751 + * @cnum: register index
27752 + * @impl: bitmask of implemented registers
27753 + * @max: highest implemented registers + 1
27754 + *
27755 + * return:
27756 + * 0 is register index is valid
27757 + * 1 if invalid
27758 + */
27759 +static inline int is_invalid(u16 cnum, unsigned long *impl, u16 max)
27760 +{
27761 + return cnum >= max || !test_bit(cnum, impl);
27762 +}
27763 +
27764 +/**
27765 + * __pfm_write_pmds - modified data registers
27766 + * @ctx: context to operate on
27767 + * @req: pfarg_pmd_t request from user
27768 + * @count: number of element in the pfarg_pmd_t vector
27769 + * @compat: used only on IA-64 to maintain backward compatibility with v2.0
27770 + *
27771 + * The function succeeds whether the context is attached or not.
27772 + * When attached to another thread, that thread must be stopped.
27773 + *
27774 + * The context is locked and interrupts are disabled.
27775 + */
27776 +int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
27777 + int compat)
27778 +{
27779 + struct pfm_event_set *set, *active_set;
27780 + u64 old_used_pmcs[PFM_PMC_BV];
27781 + unsigned long *smpl_pmds, *reset_pmds, *impl_pmds, *impl_rw_pmds;
27782 + u32 req_flags, flags;
27783 + u16 cnum, pmd_type, max_pmd;
27784 + u16 set_id;
27785 + int i, can_access_pmu;
27786 + int ret;
27787 + pfm_pmd_check_t wr_func;
27788 +
27789 + active_set = ctx->active_set;
27790 + max_pmd = ctx->regs.max_pmd;
27791 + impl_pmds = cast_ulp(ctx->regs.pmds);
27792 + impl_rw_pmds = cast_ulp(ctx->regs.rw_pmds);
27793 + wr_func = pfm_pmu_conf->pmd_write_check;
27794 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
27795 +
27796 + can_access_pmu = 0;
27797 +
27798 + /*
27799 + * we cannot access the actual PMD registers when monitoring is masked
27800 + */
27801 + if (unlikely(ctx->state == PFM_CTX_LOADED))
27802 + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
27803 + || ctx->flags.system;
27804 +
27805 + bitmap_copy(cast_ulp(old_used_pmcs),
27806 + cast_ulp(set->used_pmcs),
27807 + ctx->regs.max_pmc);
27808 +
27809 + ret = -EINVAL;
27810 + for (i = 0; i < count; i++, req++) {
27811 +
27812 + cnum = req->reg_num;
27813 + set_id = req->reg_set;
27814 + req_flags = req->reg_flags;
27815 + smpl_pmds = cast_ulp(req->reg_smpl_pmds);
27816 + reset_pmds = cast_ulp(req->reg_reset_pmds);
27817 + flags = 0;
27818 +
27819 + /*
27820 + * cannot write to unexisting
27821 + * writes to read-only register are ignored
27822 + */
27823 + if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) {
27824 + PFM_DBG("pmd%u is not available", cnum);
27825 + goto error;
27826 + }
27827 +
27828 + pmd_type = pfm_pmu_conf->pmd_desc[cnum].type;
27829 +
27830 + /*
27831 + * ensure only valid flags are set
27832 + */
27833 + if (req_flags & ~(PFM_REGFL_PMD_ALL)) {
27834 + PFM_DBG("pmd%u: invalid flags=0x%x",
27835 + cnum, req_flags);
27836 + goto error;
27837 + }
27838 +
27839 + /*
27840 + * OVFL_NOTIFY is valid for all types of PMD.
27841 + * non counting PMD may trigger PMU interrupt
27842 + * and thus may trigger recording of a sample.
27843 + * This is true with IBS on AMD family 16.
27844 + */
27845 + if (req_flags & PFM_REGFL_OVFL_NOTIFY)
27846 + flags |= PFM_REGFL_OVFL_NOTIFY;
27847 +
27848 + /*
27849 + * We allow randomization to non counting PMD
27850 + */
27851 + if (req_flags & PFM_REGFL_RANDOM)
27852 + flags |= PFM_REGFL_RANDOM;
27853 +
27854 + /*
27855 + * verify validity of smpl_pmds
27856 + */
27857 + if (unlikely(!bitmap_subset(smpl_pmds, impl_pmds, PFM_MAX_PMDS))) {
27858 + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u",
27859 + (unsigned long long)req->reg_smpl_pmds[0],
27860 + cnum);
27861 + goto error;
27862 + }
27863 +
27864 + /*
27865 + * verify validity of reset_pmds
27866 + * check against impl_rw_pmds because it is not
27867 + * possible to reset read-only PMDs
27868 + */
27869 + if (unlikely(!bitmap_subset(reset_pmds, impl_rw_pmds, PFM_MAX_PMDS))) {
27870 + PFM_DBG("invalid reset_pmds=0x%llx for pmd%u",
27871 + (unsigned long long)req->reg_reset_pmds[0],
27872 + cnum);
27873 + goto error;
27874 + }
27875 +
27876 + /*
27877 + * locate event set
27878 + */
27879 + if (set_id != set->id) {
27880 + /* update number of used register for previous set */
27881 + if (i) {
27882 + ret = update_changes(ctx, set, cast_ulp(old_used_pmcs));
27883 + if (ret)
27884 + goto error;
27885 + }
27886 +
27887 + set = pfm_find_set(ctx, set_id, 0);
27888 + if (set == NULL) {
27889 + PFM_DBG("event set%u does not exist",
27890 + set_id);
27891 + goto error;
27892 + }
27893 + bitmap_copy(cast_ulp(old_used_pmcs),
27894 + cast_ulp(set->used_pmcs),
27895 + ctx->regs.max_pmc);
27896 + }
27897 +
27898 + /*
27899 + * execute write checker, if any
27900 + */
27901 + if (unlikely(wr_func && (pmd_type & PFM_REG_WC))) {
27902 + ret = (*wr_func)(ctx, set, req);
27903 + if (ret)
27904 + goto error;
27905 +
27906 + }
27907 +
27908 +
27909 + /*
27910 + * now commit changes to software state
27911 + */
27912 +
27913 + if (unlikely(compat))
27914 + goto skip_set;
27915 +
27916 + if (bitmap_weight(smpl_pmds, max_pmd)) {
27917 + ret = handle_smpl_bv(ctx, set, smpl_pmds);
27918 + if (ret)
27919 + goto error;
27920 + update_used_pmcs(ctx, set, cast_ulp(smpl_pmds));
27921 + }
27922 +
27923 + bitmap_copy(cast_ulp(set->pmds[cnum].smpl_pmds),
27924 + smpl_pmds,
27925 + max_pmd);
27926 +
27927 +
27928 + if (bitmap_weight(reset_pmds, max_pmd)) {
27929 + ret = handle_smpl_bv(ctx, set, reset_pmds);
27930 + if (ret)
27931 + goto error;
27932 + update_used_pmcs(ctx, set, cast_ulp(reset_pmds));
27933 + }
27934 +
27935 + bitmap_copy(cast_ulp(set->pmds[cnum].reset_pmds),
27936 + reset_pmds,
27937 + max_pmd);
27938 +
27939 + set->pmds[cnum].flags = flags;
27940 +
27941 + __set_bit(cnum, cast_ulp(set->used_pmds));
27942 + update_used_reg(ctx, set, cnum);
27943 +
27944 + /*
27945 + * we reprogram the PMD hence, we clear any pending
27946 + * ovfl. Does affect ovfl switch on restart but new
27947 + * value has already been established here
27948 + */
27949 + if (test_bit(cnum, cast_ulp(set->povfl_pmds))) {
27950 + set->npend_ovfls--;
27951 + __clear_bit(cnum, cast_ulp(set->povfl_pmds));
27952 + }
27953 + __clear_bit(cnum, cast_ulp(set->ovfl_pmds));
27954 +
27955 + /*
27956 + * update ovfl_notify
27957 + */
27958 + if (flags & PFM_REGFL_OVFL_NOTIFY)
27959 + __set_bit(cnum, cast_ulp(set->ovfl_notify));
27960 + else
27961 + __clear_bit(cnum, cast_ulp(set->ovfl_notify));
27962 +
27963 + /*
27964 + * establish new switch count
27965 + */
27966 + set->pmds[cnum].ovflsw_thres = req->reg_ovfl_switch_cnt;
27967 + set->pmds[cnum].ovflsw_ref_thres = req->reg_ovfl_switch_cnt;
27968 +skip_set:
27969 +
27970 + /*
27971 + * set last value to new value for all types of PMD
27972 + */
27973 + set->pmds[cnum].lval = req->reg_value;
27974 + set->pmds[cnum].value = req->reg_value;
27975 +
27976 + /*
27977 + * update reset values (not just for counters)
27978 + */
27979 + set->pmds[cnum].long_reset = req->reg_long_reset;
27980 + set->pmds[cnum].short_reset = req->reg_short_reset;
27981 +
27982 + /*
27983 + * update randomization mask
27984 + */
27985 + set->pmds[cnum].mask = req->reg_random_mask;
27986 +
27987 + set->pmds[cnum].eventid = req->reg_smpl_eventid;
27988 +
27989 + if (set == active_set) {
27990 + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMDS;
27991 + if (can_access_pmu)
27992 + pfm_write_pmd(ctx, cnum, req->reg_value);
27993 + }
27994 +
27995 +
27996 + PFM_DBG("set%u pmd%u=0x%llx flags=0x%x a_pmu=%d "
27997 + "ctx_pmd=0x%llx s_reset=0x%llx "
27998 + "l_reset=0x%llx s_pmds=0x%llx "
27999 + "r_pmds=0x%llx o_pmds=0x%llx "
28000 + "o_thres=%llu compat=%d eventid=%llx",
28001 + set->id,
28002 + cnum,
28003 + (unsigned long long)req->reg_value,
28004 + set->pmds[cnum].flags,
28005 + can_access_pmu,
28006 + (unsigned long long)set->pmds[cnum].value,
28007 + (unsigned long long)set->pmds[cnum].short_reset,
28008 + (unsigned long long)set->pmds[cnum].long_reset,
28009 + (unsigned long long)set->pmds[cnum].smpl_pmds[0],
28010 + (unsigned long long)set->pmds[cnum].reset_pmds[0],
28011 + (unsigned long long)set->ovfl_pmds[0],
28012 + (unsigned long long)set->pmds[cnum].ovflsw_thres,
28013 + compat,
28014 + (unsigned long long)set->pmds[cnum].eventid);
28015 + }
28016 + ret = 0;
28017 +
28018 +error:
28019 + update_changes(ctx, set, cast_ulp(old_used_pmcs));
28020 +
28021 + /*
28022 + * make changes visible
28023 + */
28024 + if (can_access_pmu)
28025 + pfm_arch_serialize();
28026 +
28027 + return ret;
28028 +}
28029 +
28030 +/**
28031 + * __pfm_write_pmcs - modified config registers
28032 + * @ctx: context to operate on
28033 + * @req: pfarg_pmc_t request from user
28034 + * @count: number of element in the pfarg_pmc_t vector
28035 + *
28036 + *
28037 + * The function succeeds whether the context is * attached or not.
28038 + * When attached to another thread, that thread must be stopped.
28039 + *
28040 + * The context is locked and interrupts are disabled.
28041 + */
28042 +int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, int count)
28043 +{
28044 + struct pfm_event_set *set, *active_set;
28045 + u64 value, dfl_val, rsvd_msk;
28046 + unsigned long *impl_pmcs;
28047 + int i, can_access_pmu;
28048 + int ret;
28049 + u16 set_id;
28050 + u16 cnum, pmc_type, max_pmc;
28051 + u32 flags, expert;
28052 + pfm_pmc_check_t wr_func;
28053 +
28054 + active_set = ctx->active_set;
28055 +
28056 + wr_func = pfm_pmu_conf->pmc_write_check;
28057 + max_pmc = ctx->regs.max_pmc;
28058 + impl_pmcs = cast_ulp(ctx->regs.pmcs);
28059 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28060 +
28061 + expert = pfm_controls.flags & PFM_CTRL_FL_RW_EXPERT;
28062 +
28063 + can_access_pmu = 0;
28064 +
28065 + /*
28066 + * we cannot access the actual PMC registers when monitoring is masked
28067 + */
28068 + if (unlikely(ctx->state == PFM_CTX_LOADED))
28069 + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
28070 + || ctx->flags.system;
28071 +
28072 + ret = -EINVAL;
28073 +
28074 + for (i = 0; i < count; i++, req++) {
28075 +
28076 + cnum = req->reg_num;
28077 + set_id = req->reg_set;
28078 + value = req->reg_value;
28079 + flags = req->reg_flags;
28080 +
28081 + /*
28082 + * no access to unavailable PMC register
28083 + */
28084 + if (unlikely(is_invalid(cnum, impl_pmcs, max_pmc))) {
28085 + PFM_DBG("pmc%u is not available", cnum);
28086 + goto error;
28087 + }
28088 +
28089 + pmc_type = pfm_pmu_conf->pmc_desc[cnum].type;
28090 + dfl_val = pfm_pmu_conf->pmc_desc[cnum].dfl_val;
28091 + rsvd_msk = pfm_pmu_conf->pmc_desc[cnum].rsvd_msk;
28092 +
28093 + /*
28094 + * ensure only valid flags are set
28095 + */
28096 + if (flags & ~PFM_REGFL_PMC_ALL) {
28097 + PFM_DBG("pmc%u: invalid flags=0x%x", cnum, flags);
28098 + goto error;
28099 + }
28100 +
28101 + /*
28102 + * locate event set
28103 + */
28104 + if (set_id != set->id) {
28105 + set = pfm_find_set(ctx, set_id, 0);
28106 + if (set == NULL) {
28107 + PFM_DBG("event set%u does not exist",
28108 + set_id);
28109 + goto error;
28110 + }
28111 + }
28112 +
28113 + /*
28114 + * set reserved bits to default values
28115 + * (reserved bits must be 1 in rsvd_msk)
28116 + *
28117 + * bypass via /sys/kernel/perfmon/mode = 1
28118 + */
28119 + if (likely(!expert))
28120 + value = (value & ~rsvd_msk) | (dfl_val & rsvd_msk);
28121 +
28122 + if (flags & PFM_REGFL_NO_EMUL64) {
28123 + if (!(pmc_type & PFM_REG_NO64)) {
28124 + PFM_DBG("pmc%u no support for "
28125 + "PFM_REGFL_NO_EMUL64", cnum);
28126 + goto error;
28127 + }
28128 + value &= ~pfm_pmu_conf->pmc_desc[cnum].no_emul64_msk;
28129 + }
28130 +
28131 + /*
28132 + * execute write checker, if any
28133 + */
28134 + if (likely(wr_func && (pmc_type & PFM_REG_WC))) {
28135 + req->reg_value = value;
28136 + ret = (*wr_func)(ctx, set, req);
28137 + if (ret)
28138 + goto error;
28139 + value = req->reg_value;
28140 + }
28141 +
28142 + /*
28143 + * Now we commit the changes
28144 + */
28145 +
28146 + /*
28147 + * mark PMC register as used
28148 + * We do not track associated PMC register based on
28149 + * the fact that they will likely need to be written
28150 + * in order to become useful at which point the statement
28151 + * below will catch that.
28152 + *
28153 + * The used_pmcs bitmask is only useful on architectures where
28154 + * the PMC needs to be modified for particular bits, especially
28155 + * on overflow or to stop/start.
28156 + */
28157 + if (!test_bit(cnum, cast_ulp(set->used_pmcs))) {
28158 + __set_bit(cnum, cast_ulp(set->used_pmcs));
28159 + set->nused_pmcs++;
28160 + }
28161 +
28162 + set->pmcs[cnum] = value;
28163 +
28164 + if (set == active_set) {
28165 + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
28166 + if (can_access_pmu)
28167 + pfm_arch_write_pmc(ctx, cnum, value);
28168 + }
28169 +
28170 + PFM_DBG("set%u pmc%u=0x%llx a_pmu=%d "
28171 + "u_pmcs=0x%llx nu_pmcs=%u",
28172 + set->id,
28173 + cnum,
28174 + (unsigned long long)value,
28175 + can_access_pmu,
28176 + (unsigned long long)set->used_pmcs[0],
28177 + set->nused_pmcs);
28178 + }
28179 + ret = 0;
28180 +error:
28181 + /*
28182 + * make sure the changes are visible
28183 + */
28184 + if (can_access_pmu)
28185 + pfm_arch_serialize();
28186 +
28187 + return ret;
28188 +}
28189 +
28190 +/**
28191 + * __pfm_read_pmds - read data registers
28192 + * @ctx: context to operate on
28193 + * @req: pfarg_pmd_t request from user
28194 + * @count: number of element in the pfarg_pmd_t vector
28195 + *
28196 + *
28197 + * The function succeeds whether the context is attached or not.
28198 + * When attached to another thread, that thread must be stopped.
28199 + *
28200 + * The context is locked and interrupts are disabled.
28201 + */
28202 +int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count)
28203 +{
28204 + u64 val = 0, lval, ovfl_mask, hw_val;
28205 + u64 sw_cnt;
28206 + unsigned long *impl_pmds;
28207 + struct pfm_event_set *set, *active_set;
28208 + int i, ret, can_access_pmu = 0;
28209 + u16 cnum, pmd_type, set_id, max_pmd;
28210 +
28211 + ovfl_mask = pfm_pmu_conf->ovfl_mask;
28212 + impl_pmds = cast_ulp(ctx->regs.pmds);
28213 + max_pmd = ctx->regs.max_pmd;
28214 + active_set = ctx->active_set;
28215 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28216 +
28217 + if (likely(ctx->state == PFM_CTX_LOADED)) {
28218 + can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
28219 + || ctx->flags.system;
28220 +
28221 + if (can_access_pmu)
28222 + pfm_arch_serialize();
28223 + }
28224 +
28225 + /*
28226 + * on both UP and SMP, we can only read the PMD from the hardware
28227 + * register when the task is the owner of the local PMU.
28228 + */
28229 + ret = -EINVAL;
28230 + for (i = 0; i < count; i++, req++) {
28231 +
28232 + cnum = req->reg_num;
28233 + set_id = req->reg_set;
28234 +
28235 + if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) {
28236 + PFM_DBG("pmd%u is not implemented/unaccessible", cnum);
28237 + goto error;
28238 + }
28239 +
28240 + pmd_type = pfm_pmu_conf->pmd_desc[cnum].type;
28241 +
28242 + /*
28243 + * locate event set
28244 + */
28245 + if (set_id != set->id) {
28246 + set = pfm_find_set(ctx, set_id, 0);
28247 + if (set == NULL) {
28248 + PFM_DBG("event set%u does not exist",
28249 + set_id);
28250 + goto error;
28251 + }
28252 + }
28253 + /*
28254 + * it is not possible to read a PMD which was not requested:
28255 + * - explicitly written via pfm_write_pmds()
28256 + * - provided as a reg_smpl_pmds[] to another PMD during
28257 + * pfm_write_pmds()
28258 + *
28259 + * This is motivated by security and for optimization purposes:
28260 + * - on context switch restore, we can restore only what
28261 + * we use (except when regs directly readable at user
28262 + * level, e.g., IA-64 self-monitoring, I386 RDPMC).
28263 + * - do not need to maintain PMC -> PMD dependencies
28264 + */
28265 + if (unlikely(!test_bit(cnum, cast_ulp(set->used_pmds)))) {
28266 + PFM_DBG("pmd%u cannot read, because not used", cnum);
28267 + goto error;
28268 + }
28269 +
28270 + val = set->pmds[cnum].value;
28271 + lval = set->pmds[cnum].lval;
28272 +
28273 + /*
28274 + * extract remaining ovfl to switch
28275 + */
28276 + sw_cnt = set->pmds[cnum].ovflsw_thres;
28277 +
28278 + /*
28279 + * If the task is not the current one, then we check if the
28280 + * PMU state is still in the local live register due to lazy
28281 + * ctxsw. If true, then we read directly from the registers.
28282 + */
28283 + if (set == active_set && can_access_pmu) {
28284 + hw_val = pfm_read_pmd(ctx, cnum);
28285 + if (pmd_type & PFM_REG_C64)
28286 + val = (val & ~ovfl_mask) | (hw_val & ovfl_mask);
28287 + else
28288 + val = hw_val;
28289 + }
28290 +
28291 + PFM_DBG("set%u pmd%u=0x%llx sw_thr=%llu lval=0x%llx",
28292 + set->id,
28293 + cnum,
28294 + (unsigned long long)val,
28295 + (unsigned long long)sw_cnt,
28296 + (unsigned long long)lval);
28297 +
28298 + req->reg_value = val;
28299 + req->reg_last_reset_val = lval;
28300 + req->reg_ovfl_switch_cnt = sw_cnt;
28301 + }
28302 + ret = 0;
28303 +error:
28304 + return ret;
28305 +}
28306 diff --git a/perfmon/perfmon_sets.c b/perfmon/perfmon_sets.c
28307 new file mode 100644
28308 index 0000000..24534cb
28309 --- /dev/null
28310 +++ b/perfmon/perfmon_sets.c
28311 @@ -0,0 +1,873 @@
28312 +/*
28313 + * perfmon_sets.c: perfmon2 event sets and multiplexing functions
28314 + *
28315 + * This file implements the perfmon2 interface which
28316 + * provides access to the hardware performance counters
28317 + * of the host processor.
28318 + *
28319 + * The initial version of perfmon.c was written by
28320 + * Ganesh Venkitachalam, IBM Corp.
28321 + *
28322 + * Then it was modified for perfmon-1.x by Stephane Eranian and
28323 + * David Mosberger, Hewlett Packard Co.
28324 + *
28325 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
28326 + * by Stephane Eranian, Hewlett Packard Co.
28327 + *
28328 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
28329 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
28330 + * David Mosberger-Tang <davidm@hpl.hp.com>
28331 + *
28332 + * More information about perfmon available at:
28333 + * http://perfmon2.sf.net
28334 + *
28335 + * This program is free software; you can redistribute it and/or
28336 + * modify it under the terms of version 2 of the GNU General Public
28337 + * License as published by the Free Software Foundation.
28338 + *
28339 + * This program is distributed in the hope that it will be useful,
28340 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
28341 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28342 + * General Public License for more details.
28343 + *
28344 + * You should have received a copy of the GNU General Public License
28345 + * along with this program; if not, write to the Free Software
28346 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28347 + * 02111-1307 USA
28348 + */
28349 +#include <linux/kernel.h>
28350 +#include <linux/perfmon_kern.h>
28351 +#include "perfmon_priv.h"
28352 +
28353 +static struct kmem_cache *pfm_set_cachep;
28354 +
28355 +/**
28356 + * pfm_reload_switch_thresholds - reload overflow-based switch thresholds per set
28357 + * @set: the set for which to reload thresholds
28358 + *
28359 + */
28360 +static void pfm_reload_switch_thresholds(struct pfm_context *ctx,
28361 + struct pfm_event_set *set)
28362 +{
28363 + u64 *used_pmds;
28364 + u16 i, max, first;
28365 +
28366 + used_pmds = set->used_pmds;
28367 + first = ctx->regs.first_intr_pmd;
28368 + max = ctx->regs.max_intr_pmd;
28369 +
28370 + for (i = first; i < max; i++) {
28371 + if (test_bit(i, cast_ulp(used_pmds))) {
28372 + set->pmds[i].ovflsw_thres = set->pmds[i].ovflsw_ref_thres;
28373 +
28374 + PFM_DBG("set%u pmd%u ovflsw_thres=%llu",
28375 + set->id,
28376 + i,
28377 + (unsigned long long)set->pmds[i].ovflsw_thres);
28378 + }
28379 + }
28380 +}
28381 +
28382 +/**
28383 + * pfm_prepare_sets - initialize sets on pfm_load_context
28384 + * @ctx : context to operate on
28385 + * @load_set: set to activate first
28386 + *
28387 + * connect all sets, reset internal fields
28388 + */
28389 +struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set)
28390 +{
28391 + struct pfm_event_set *set, *p;
28392 + u16 max;
28393 +
28394 + /*
28395 + * locate first set to activate
28396 + */
28397 + set = pfm_find_set(ctx, load_set, 0);
28398 + if (!set)
28399 + return NULL;
28400 +
28401 + if (set->flags & PFM_SETFL_OVFL_SWITCH)
28402 + pfm_reload_switch_thresholds(ctx, set);
28403 +
28404 + max = ctx->regs.max_intr_pmd;
28405 +
28406 + list_for_each_entry(p, &ctx->set_list, list) {
28407 + /*
28408 + * cleanup bitvectors
28409 + */
28410 + bitmap_zero(cast_ulp(p->ovfl_pmds), max);
28411 + bitmap_zero(cast_ulp(p->povfl_pmds), max);
28412 +
28413 + p->npend_ovfls = 0;
28414 +
28415 + /*
28416 + * we cannot just use plain clear because of arch-specific flags
28417 + */
28418 + p->priv_flags &= ~(PFM_SETFL_PRIV_MOD_BOTH|PFM_SETFL_PRIV_SWITCH);
28419 + /*
28420 + * neither duration nor runs are reset because typically loading/unloading
28421 + * does not mean counts are reset. To reset, the set must be modified
28422 + */
28423 + }
28424 + return set;
28425 +}
28426 +
28427 +/*
28428 + * called by hrtimer_interrupt()
28429 + *
28430 + * This is the only function where we come with
28431 + * cpu_base->lock held before ctx->lock
28432 + *
28433 + * interrupts are disabled
28434 + */
28435 +enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t)
28436 +{
28437 + struct pfm_event_set *set;
28438 + struct pfm_context *ctx;
28439 + unsigned long flags;
28440 + enum hrtimer_restart ret = HRTIMER_NORESTART;
28441 +
28442 + /*
28443 + * prevent against race with unload
28444 + */
28445 + ctx = __get_cpu_var(pmu_ctx);
28446 + if (!ctx)
28447 + return HRTIMER_NORESTART;
28448 +
28449 + spin_lock_irqsave(&ctx->lock, flags);
28450 +
28451 + set = ctx->active_set;
28452 +
28453 + /*
28454 + * switching occurs only when context is attached
28455 + */
28456 + if (ctx->state != PFM_CTX_LOADED)
28457 + goto done;
28458 + /*
28459 + * timer does not run while monitoring is inactive (not started)
28460 + */
28461 + if (!pfm_arch_is_active(ctx))
28462 + goto done;
28463 +
28464 + pfm_stats_inc(handle_timeout_count);
28465 +
28466 + ret = pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_SHORT, 0);
28467 +done:
28468 + spin_unlock_irqrestore(&ctx->lock, flags);
28469 + return ret;
28470 +}
28471 +
28472 +/*
28473 + *
28474 + * always operating on the current task
28475 + * interrupts are masked
28476 + *
28477 + * input:
28478 + * - new_set: new set to switch to, if NULL follow normal chain
28479 + */
28480 +enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx,
28481 + struct pfm_event_set *new_set,
28482 + int reset_mode,
28483 + int no_restart)
28484 +{
28485 + struct pfm_event_set *set;
28486 + u64 now, end;
28487 + u32 new_flags;
28488 + int is_system, is_active, nn;
28489 + enum hrtimer_restart ret = HRTIMER_NORESTART;
28490 +
28491 + now = sched_clock();
28492 + set = ctx->active_set;
28493 + is_active = pfm_arch_is_active(ctx);
28494 +
28495 + /*
28496 + * if no set is explicitly requested,
28497 + * use the set_switch_next field
28498 + */
28499 + if (!new_set) {
28500 + /*
28501 + * we use round-robin unless the user specified
28502 + * a particular set to go to.
28503 + */
28504 + new_set = list_first_entry(&set->list, struct pfm_event_set, list);
28505 + if (&new_set->list == &ctx->set_list)
28506 + new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28507 + }
28508 +
28509 + PFM_DBG_ovfl("state=%d act=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u "
28510 + "next_runs=%llu new_npend=%d reset_mode=%d reset_pmds=%llx",
28511 + ctx->state,
28512 + is_active,
28513 + set->id,
28514 + (unsigned long long)set->runs,
28515 + set->npend_ovfls,
28516 + new_set->id,
28517 + (unsigned long long)new_set->runs,
28518 + new_set->npend_ovfls,
28519 + reset_mode,
28520 + (unsigned long long)new_set->reset_pmds[0]);
28521 +
28522 + is_system = ctx->flags.system;
28523 + new_flags = new_set->flags;
28524 +
28525 + /*
28526 + * nothing more to do
28527 + */
28528 + if (new_set == set)
28529 + goto skip_same_set;
28530 +
28531 + if (is_active) {
28532 + pfm_arch_stop(current, ctx);
28533 + pfm_save_pmds(ctx, set);
28534 + /*
28535 + * compute elapsed ns for active set
28536 + */
28537 + set->duration += now - set->duration_start;
28538 + }
28539 +
28540 + pfm_arch_restore_pmds(ctx, new_set);
28541 + /*
28542 + * if masked, we must restore the pmcs such that they
28543 + * do not capture anything.
28544 + */
28545 + pfm_arch_restore_pmcs(ctx, new_set);
28546 +
28547 + if (new_set->npend_ovfls) {
28548 + pfm_arch_resend_irq(ctx);
28549 + pfm_stats_inc(ovfl_intr_replay_count);
28550 + }
28551 +
28552 + new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
28553 +
28554 +skip_same_set:
28555 + new_set->runs++;
28556 + /*
28557 + * reset switch threshold
28558 + */
28559 + if (new_flags & PFM_SETFL_OVFL_SWITCH)
28560 + pfm_reload_switch_thresholds(ctx, new_set);
28561 +
28562 + /*
28563 + * reset overflowed PMD registers in new set
28564 + */
28565 + nn = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd);
28566 + if (nn)
28567 + pfm_reset_pmds(ctx, new_set, nn, reset_mode);
28568 +
28569 +
28570 + /*
28571 + * This is needed when coming from pfm_start()
28572 + *
28573 + * When switching to the same set, there is no
28574 + * need to restart
28575 + */
28576 + if (no_restart)
28577 + goto skip_restart;
28578 +
28579 + if (is_active) {
28580 + /*
28581 + * do not need to restart when same set
28582 + */
28583 + if (new_set != set) {
28584 + ctx->active_set = new_set;
28585 + new_set->duration_start = now;
28586 + pfm_arch_start(current, ctx);
28587 + }
28588 + /*
28589 + * install new timeout if necessary
28590 + */
28591 + if (new_flags & PFM_SETFL_TIME_SWITCH) {
28592 + struct hrtimer *h;
28593 + h = &__get_cpu_var(pfm_hrtimer);
28594 + hrtimer_forward(h, h->base->get_time(), new_set->hrtimer_exp);
28595 + new_set->hrtimer_rem = new_set->hrtimer_exp;
28596 + ret = HRTIMER_RESTART;
28597 + }
28598 + }
28599 +
28600 +skip_restart:
28601 + ctx->active_set = new_set;
28602 +
28603 + end = sched_clock();
28604 +
28605 + pfm_stats_inc(set_switch_count);
28606 + pfm_stats_add(set_switch_ns, end - now);
28607 +
28608 + return ret;
28609 +}
28610 +
28611 +/*
28612 + * called from __pfm_overflow_handler() to switch event sets.
28613 + * monitoring is stopped, task is current, interrupts are masked.
28614 + * compared to pfm_switch_sets(), this version is simplified because
28615 + * it knows about the call path. There is no need to stop monitoring
28616 + * because it is already frozen by PMU handler.
28617 + */
28618 +void pfm_switch_sets_from_intr(struct pfm_context *ctx)
28619 +{
28620 + struct pfm_event_set *set, *new_set;
28621 + u64 now, end;
28622 + u32 new_flags;
28623 + int is_system, n;
28624 +
28625 + now = sched_clock();
28626 + set = ctx->active_set;
28627 + new_set = list_first_entry(&set->list, struct pfm_event_set, list);
28628 + if (&new_set->list == &ctx->set_list)
28629 + new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28630 +
28631 + PFM_DBG_ovfl("state=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u "
28632 + "next_runs=%llu new_npend=%d new_r_pmds=%llx",
28633 + ctx->state,
28634 + set->id,
28635 + (unsigned long long)set->runs,
28636 + set->npend_ovfls,
28637 + new_set->id,
28638 + (unsigned long long)new_set->runs,
28639 + new_set->npend_ovfls,
28640 + (unsigned long long)new_set->reset_pmds[0]);
28641 +
28642 + is_system = ctx->flags.system;
28643 + new_flags = new_set->flags;
28644 +
28645 + /*
28646 + * nothing more to do
28647 + */
28648 + if (new_set == set)
28649 + goto skip_same_set;
28650 +
28651 + /*
28652 + * switch on intr only when set has OVFL_SWITCH
28653 + */
28654 + BUG_ON(set->flags & PFM_SETFL_TIME_SWITCH);
28655 +
28656 + /*
28657 + * when called from PMU intr handler, monitoring
28658 + * is already stopped
28659 + *
28660 + * save current PMD registers, we use a special
28661 + * form for performance reason. On some architectures,
28662 + * such as x86, the pmds are already saved when entering
28663 + * the PMU interrupt handler via pfm-arch_intr_freeze()
28664 + * so we don't need to save them again. On the contrary,
28665 + * on IA-64, they are not saved by freeze, thus we have to
28666 + * to it here.
28667 + */
28668 + pfm_arch_save_pmds_from_intr(ctx, set);
28669 +
28670 + /*
28671 + * compute elapsed ns for active set
28672 + */
28673 + set->duration += now - set->duration_start;
28674 +
28675 + pfm_arch_restore_pmds(ctx, new_set);
28676 +
28677 + /*
28678 + * must not be restored active as we are still executing in the
28679 + * PMU interrupt handler. activation is deferred to unfreeze PMU
28680 + */
28681 + pfm_arch_restore_pmcs(ctx, new_set);
28682 +
28683 + /*
28684 + * check for pending interrupt on incoming set.
28685 + * interrupts are masked so handler call deferred
28686 + */
28687 + if (new_set->npend_ovfls) {
28688 + pfm_arch_resend_irq(ctx);
28689 + pfm_stats_inc(ovfl_intr_replay_count);
28690 + }
28691 + /*
28692 + * no need to restore anything, that is already done
28693 + */
28694 + new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
28695 + /*
28696 + * reset duration counter
28697 + */
28698 + new_set->duration_start = now;
28699 +
28700 +skip_same_set:
28701 + new_set->runs++;
28702 +
28703 + /*
28704 + * reset switch threshold
28705 + */
28706 + if (new_flags & PFM_SETFL_OVFL_SWITCH)
28707 + pfm_reload_switch_thresholds(ctx, new_set);
28708 +
28709 + /*
28710 + * reset overflowed PMD registers
28711 + */
28712 + n = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd);
28713 + if (n)
28714 + pfm_reset_pmds(ctx, new_set, n, PFM_PMD_RESET_SHORT);
28715 +
28716 + /*
28717 + * XXX: isactive?
28718 + *
28719 + * Came here following a interrupt which triggered a switch, i.e.,
28720 + * previous set was using OVFL_SWITCH, thus we just need to arm
28721 + * check if the next set is using timeout, and if so arm the timer.
28722 + *
28723 + * Timeout is always at least one tick away. No risk of having to
28724 + * invoke the timeout handler right now. In any case, cb_mode is
28725 + * set to HRTIMER_CB_IRQSAFE_NO_SOFTIRQ such that hrtimer_start
28726 + * will not try to wakeup the softirqd which could cause a locking
28727 + * problem.
28728 + */
28729 + if (new_flags & PFM_SETFL_TIME_SWITCH) {
28730 + hrtimer_start(&__get_cpu_var(pfm_hrtimer), set->hrtimer_exp, HRTIMER_MODE_REL);
28731 + PFM_DBG("armed new timeout for set%u", new_set->id);
28732 + }
28733 +
28734 + ctx->active_set = new_set;
28735 +
28736 + end = sched_clock();
28737 +
28738 + pfm_stats_inc(set_switch_count);
28739 + pfm_stats_add(set_switch_ns, end - now);
28740 +}
28741 +
28742 +
28743 +static int pfm_setfl_sane(struct pfm_context *ctx, u32 flags)
28744 +{
28745 +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH)
28746 + int ret;
28747 +
28748 + ret = pfm_arch_setfl_sane(ctx, flags);
28749 + if (ret)
28750 + return ret;
28751 +
28752 + if ((flags & PFM_SETFL_BOTH_SWITCH) == PFM_SETFL_BOTH_SWITCH) {
28753 + PFM_DBG("both switch ovfl and switch time are set");
28754 + return -EINVAL;
28755 + }
28756 + return 0;
28757 +}
28758 +
28759 +/*
28760 + * it is never possible to change the identification of an existing set
28761 + */
28762 +static int pfm_change_evtset(struct pfm_context *ctx,
28763 + struct pfm_event_set *set,
28764 + struct pfarg_setdesc *req)
28765 +{
28766 + struct timeval tv;
28767 + struct timespec ts;
28768 + ktime_t kt;
28769 + long d, res_ns;
28770 + s32 rem;
28771 + u32 flags;
28772 + int ret;
28773 + u16 set_id;
28774 +
28775 + BUG_ON(ctx->state == PFM_CTX_LOADED);
28776 +
28777 + set_id = req->set_id;
28778 + flags = req->set_flags;
28779 +
28780 + ret = pfm_setfl_sane(ctx, flags);
28781 + if (ret) {
28782 + PFM_DBG("invalid flags 0x%x set %u", flags, set_id);
28783 + return -EINVAL;
28784 + }
28785 +
28786 + /*
28787 + * compute timeout value
28788 + */
28789 + if (flags & PFM_SETFL_TIME_SWITCH) {
28790 + /*
28791 + * timeout value of zero is illegal
28792 + */
28793 + if (req->set_timeout == 0) {
28794 + PFM_DBG("invalid timeout 0");
28795 + return -EINVAL;
28796 + }
28797 +
28798 + hrtimer_get_res(CLOCK_MONOTONIC, &ts);
28799 + res_ns = (long)ktime_to_ns(timespec_to_ktime(ts));
28800 +
28801 + /*
28802 + * round-up to multiple of clock resolution
28803 + * timeout = ((req->set_timeout+res_ns-1)/res_ns)*res_ns;
28804 + *
28805 + * u64 division missing on 32-bit arch, so use div_s64_rem
28806 + */
28807 + d = div_s64_rem(req->set_timeout, res_ns, &rem);
28808 +
28809 + PFM_DBG("set%u flags=0x%x req_timeout=%lluns "
28810 + "HZ=%u TICK_NSEC=%lu clock_res=%ldns rem=%dns",
28811 + set_id,
28812 + flags,
28813 + (unsigned long long)req->set_timeout,
28814 + HZ, TICK_NSEC,
28815 + res_ns,
28816 + rem);
28817 +
28818 + /*
28819 + * Only accept timeout, we can actually achieve.
28820 + * users can invoke clock_getres(CLOCK_MONOTONIC)
28821 + * to figure out resolution and adjust timeout
28822 + */
28823 + if (rem) {
28824 + PFM_DBG("set%u invalid timeout=%llu",
28825 + set_id,
28826 + (unsigned long long)req->set_timeout);
28827 + return -EINVAL;
28828 + }
28829 +
28830 + tv = ns_to_timeval(req->set_timeout);
28831 + kt = timeval_to_ktime(tv);
28832 + set->hrtimer_exp = kt;
28833 + } else {
28834 + set->hrtimer_exp = ktime_set(0, 0);
28835 + }
28836 +
28837 + /*
28838 + * commit changes
28839 + */
28840 + set->id = set_id;
28841 + set->flags = flags;
28842 + set->priv_flags = 0;
28843 +
28844 + /*
28845 + * activation and duration counters are reset as
28846 + * most likely major things will change in the set
28847 + */
28848 + set->runs = 0;
28849 + set->duration = 0;
28850 +
28851 + return 0;
28852 +}
28853 +
28854 +/*
28855 + * this function does not modify the next field
28856 + */
28857 +static void pfm_initialize_set(struct pfm_context *ctx,
28858 + struct pfm_event_set *set)
28859 +{
28860 + u64 *impl_pmcs;
28861 + u16 i, max_pmc;
28862 +
28863 + max_pmc = ctx->regs.max_pmc;
28864 + impl_pmcs = ctx->regs.pmcs;
28865 +
28866 + /*
28867 + * install default values for all PMC registers
28868 + */
28869 + for (i = 0; i < max_pmc; i++) {
28870 + if (test_bit(i, cast_ulp(impl_pmcs))) {
28871 + set->pmcs[i] = pfm_pmu_conf->pmc_desc[i].dfl_val;
28872 + PFM_DBG("set%u pmc%u=0x%llx",
28873 + set->id,
28874 + i,
28875 + (unsigned long long)set->pmcs[i]);
28876 + }
28877 + }
28878 +
28879 + /*
28880 + * PMD registers are set to 0 when the event set is allocated,
28881 + * hence we do not need to explicitly initialize them.
28882 + *
28883 + * For virtual PMD registers (i.e., those tied to a SW resource)
28884 + * their value becomes meaningful once the context is attached.
28885 + */
28886 +}
28887 +
28888 +/*
28889 + * look for an event set using its identification. If the set does not
28890 + * exist:
28891 + * - if alloc == 0 then return error
28892 + * - if alloc == 1 then allocate set
28893 + *
28894 + * alloc is one ONLY when coming from pfm_create_evtsets() which can only
28895 + * be called when the context is detached, i.e. monitoring is stopped.
28896 + */
28897 +struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, int alloc)
28898 +{
28899 + struct pfm_event_set *set = NULL, *prev, *new_set;
28900 +
28901 + PFM_DBG("looking for set=%u", set_id);
28902 +
28903 + prev = NULL;
28904 + list_for_each_entry(set, &ctx->set_list, list) {
28905 + if (set->id == set_id)
28906 + return set;
28907 + if (set->id > set_id)
28908 + break;
28909 + prev = set;
28910 + }
28911 +
28912 + if (!alloc)
28913 + return NULL;
28914 +
28915 + /*
28916 + * we are holding the context spinlock and interrupts
28917 + * are unmasked. We must use GFP_ATOMIC as we cannot
28918 + * sleep while holding a spin lock.
28919 + */
28920 + new_set = kmem_cache_zalloc(pfm_set_cachep, GFP_ATOMIC);
28921 + if (!new_set)
28922 + return NULL;
28923 +
28924 + new_set->id = set_id;
28925 +
28926 + INIT_LIST_HEAD(&new_set->list);
28927 +
28928 + if (prev == NULL) {
28929 + list_add(&(new_set->list), &ctx->set_list);
28930 + } else {
28931 + PFM_DBG("add after set=%u", prev->id);
28932 + list_add(&(new_set->list), &prev->list);
28933 + }
28934 + return new_set;
28935 +}
28936 +
28937 +/**
28938 + * pfm_create_initial_set - create initial set from __pfm_c reate_context
28939 + * @ctx: context to atatched the set to
28940 + */
28941 +int pfm_create_initial_set(struct pfm_context *ctx)
28942 +{
28943 + struct pfm_event_set *set;
28944 +
28945 + /*
28946 + * create initial set0
28947 + */
28948 + if (!pfm_find_set(ctx, 0, 1))
28949 + return -ENOMEM;
28950 +
28951 + set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
28952 +
28953 + pfm_initialize_set(ctx, set);
28954 +
28955 + return 0;
28956 +}
28957 +
28958 +/*
28959 + * context is unloaded for this command. Interrupts are enabled
28960 + */
28961 +int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req,
28962 + int count)
28963 +{
28964 + struct pfm_event_set *set;
28965 + u16 set_id;
28966 + int i, ret;
28967 +
28968 + for (i = 0; i < count; i++, req++) {
28969 + set_id = req->set_id;
28970 +
28971 + PFM_DBG("set_id=%u", set_id);
28972 +
28973 + set = pfm_find_set(ctx, set_id, 1);
28974 + if (set == NULL)
28975 + goto error_mem;
28976 +
28977 + ret = pfm_change_evtset(ctx, set, req);
28978 + if (ret)
28979 + goto error_params;
28980 +
28981 + pfm_initialize_set(ctx, set);
28982 + }
28983 + return 0;
28984 +error_mem:
28985 + PFM_DBG("cannot allocate set %u", set_id);
28986 + return -ENOMEM;
28987 +error_params:
28988 + return ret;
28989 +}
28990 +
28991 +int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req,
28992 + int count)
28993 +{
28994 + struct pfm_event_set *set;
28995 + int i, is_system, is_loaded, is_self, ret;
28996 + u16 set_id;
28997 + u64 end;
28998 +
28999 + end = sched_clock();
29000 +
29001 + is_system = ctx->flags.system;
29002 + is_loaded = ctx->state == PFM_CTX_LOADED;
29003 + is_self = ctx->task == current || is_system;
29004 +
29005 + ret = -EINVAL;
29006 + for (i = 0; i < count; i++, req++) {
29007 +
29008 + set_id = req->set_id;
29009 +
29010 + list_for_each_entry(set, &ctx->set_list, list) {
29011 + if (set->id == set_id)
29012 + goto found;
29013 + if (set->id > set_id)
29014 + goto error;
29015 + }
29016 +found:
29017 + req->set_flags = set->flags;
29018 +
29019 + /*
29020 + * compute leftover timeout
29021 + *
29022 + * lockdep may complain about lock inversion
29023 + * because of get_remaining() however, this
29024 + * applies to self-montoring only, thus the
29025 + * thread cannot be in the timeout handler
29026 + * and here at the same time given that we
29027 + * run with interrupts disabled
29028 + */
29029 + if (is_loaded && is_self) {
29030 + struct hrtimer *h;
29031 + h = &__get_cpu_var(pfm_hrtimer);
29032 + req->set_timeout = ktime_to_ns(hrtimer_get_remaining(h));
29033 + } else {
29034 + /*
29035 + * hrtimer_rem zero when not using
29036 + * timeout-based switching
29037 + */
29038 + req->set_timeout = ktime_to_ns(set->hrtimer_rem);
29039 + }
29040 +
29041 + req->set_runs = set->runs;
29042 + req->set_act_duration = set->duration;
29043 +
29044 + /*
29045 + * adjust for active set if needed
29046 + */
29047 + if (is_system && is_loaded && ctx->flags.started
29048 + && set == ctx->active_set)
29049 + req->set_act_duration += end - set->duration_start;
29050 +
29051 + /*
29052 + * copy the list of pmds which last overflowed
29053 + */
29054 + bitmap_copy(cast_ulp(req->set_ovfl_pmds),
29055 + cast_ulp(set->ovfl_pmds),
29056 + PFM_MAX_PMDS);
29057 +
29058 + /*
29059 + * copy bitmask of available PMU registers
29060 + *
29061 + * must copy over the entire vector to avoid
29062 + * returning bogus upper bits pass by user
29063 + */
29064 + bitmap_copy(cast_ulp(req->set_avail_pmcs),
29065 + cast_ulp(ctx->regs.pmcs),
29066 + PFM_MAX_PMCS);
29067 +
29068 + bitmap_copy(cast_ulp(req->set_avail_pmds),
29069 + cast_ulp(ctx->regs.pmds),
29070 + PFM_MAX_PMDS);
29071 +
29072 + PFM_DBG("set%u flags=0x%x eff_usec=%llu runs=%llu "
29073 + "a_pmcs=0x%llx a_pmds=0x%llx",
29074 + set_id,
29075 + set->flags,
29076 + (unsigned long long)req->set_timeout,
29077 + (unsigned long long)set->runs,
29078 + (unsigned long long)ctx->regs.pmcs[0],
29079 + (unsigned long long)ctx->regs.pmds[0]);
29080 + }
29081 + ret = 0;
29082 +error:
29083 + return ret;
29084 +}
29085 +
29086 +/*
29087 + * context is unloaded for this command. Interrupts are enabled
29088 + */
29089 +int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count)
29090 +{
29091 + struct pfarg_setdesc *req = arg;
29092 + struct pfm_event_set *set;
29093 + u16 set_id;
29094 + int i, ret;
29095 +
29096 + ret = -EINVAL;
29097 + for (i = 0; i < count; i++, req++) {
29098 + set_id = req->set_id;
29099 +
29100 + list_for_each_entry(set, &ctx->set_list, list) {
29101 + if (set->id == set_id)
29102 + goto found;
29103 + if (set->id > set_id)
29104 + goto error;
29105 + }
29106 + goto error;
29107 +found:
29108 + /*
29109 + * clear active set if necessary.
29110 + * will be updated when context is loaded
29111 + */
29112 + if (set == ctx->active_set)
29113 + ctx->active_set = NULL;
29114 +
29115 + list_del(&set->list);
29116 +
29117 + kmem_cache_free(pfm_set_cachep, set);
29118 +
29119 + PFM_DBG("set%u deleted", set_id);
29120 + }
29121 + ret = 0;
29122 +error:
29123 + return ret;
29124 +}
29125 +
29126 +/*
29127 + * called from pfm_context_free() to free all sets
29128 + */
29129 +void pfm_free_sets(struct pfm_context *ctx)
29130 +{
29131 + struct pfm_event_set *set, *tmp;
29132 +
29133 + list_for_each_entry_safe(set, tmp, &ctx->set_list, list) {
29134 + list_del(&set->list);
29135 + kmem_cache_free(pfm_set_cachep, set);
29136 + }
29137 +}
29138 +
29139 +/**
29140 + * pfm_restart_timer - restart hrtimer taking care of expired timeout
29141 + * @ctx : context to work with
29142 + * @set : current active set
29143 + *
29144 + * Must be called on the processor on which the timer is to be armed.
29145 + * Assumes context is locked and interrupts are masked
29146 + *
29147 + * Upon return the active set for the context may have changed
29148 + */
29149 +void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set)
29150 +{
29151 + struct hrtimer *h;
29152 + enum hrtimer_restart ret;
29153 +
29154 + h = &__get_cpu_var(pfm_hrtimer);
29155 +
29156 + PFM_DBG_ovfl("hrtimer=%lld", (long long)ktime_to_ns(set->hrtimer_rem));
29157 +
29158 + if (ktime_to_ns(set->hrtimer_rem) > 0) {
29159 + hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL);
29160 + } else {
29161 + /*
29162 + * timer was not re-armed because it has already expired
29163 + * timer was not enqueued, we need to switch set now
29164 + */
29165 + pfm_stats_inc(set_switch_exp);
29166 +
29167 + ret = pfm_switch_sets(ctx, NULL, 1, 0);
29168 + set = ctx->active_set;
29169 + if (ret == HRTIMER_RESTART)
29170 + hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL);
29171 + }
29172 +}
29173 +
29174 +int __init pfm_init_sets(void)
29175 +{
29176 + pfm_set_cachep = kmem_cache_create("pfm_event_set",
29177 + sizeof(struct pfm_event_set),
29178 + SLAB_HWCACHE_ALIGN, 0, NULL);
29179 + if (!pfm_set_cachep) {
29180 + PFM_ERR("cannot initialize event set slab");
29181 + return -ENOMEM;
29182 + }
29183 + return 0;
29184 +}
29185 diff --git a/perfmon/perfmon_smpl.c b/perfmon/perfmon_smpl.c
29186 new file mode 100644
29187 index 0000000..e31fb15
29188 --- /dev/null
29189 +++ b/perfmon/perfmon_smpl.c
29190 @@ -0,0 +1,865 @@
29191 +/*
29192 + * perfmon_smpl.c: perfmon2 sampling management
29193 + *
29194 + * This file implements the perfmon2 interface which
29195 + * provides access to the hardware performance counters
29196 + * of the host processor.
29197 + *
29198 + *
29199 + * The initial version of perfmon.c was written by
29200 + * Ganesh Venkitachalam, IBM Corp.
29201 + *
29202 + * Then it was modified for perfmon-1.x by Stephane Eranian and
29203 + * David Mosberger, Hewlett Packard Co.
29204 + *
29205 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
29206 + * by Stephane Eranian, Hewlett Packard Co.
29207 + *
29208 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
29209 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
29210 + * David Mosberger-Tang <davidm@hpl.hp.com>
29211 + *
29212 + * More information about perfmon available at:
29213 + * http://perfmon2.sf.net
29214 + *
29215 + * This program is free software; you can redistribute it and/or
29216 + * modify it under the terms of version 2 of the GNU General Public
29217 + * License as published by the Free Software Foundation.
29218 + *
29219 + * This program is distributed in the hope that it will be useful,
29220 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
29221 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29222 + * General Public License for more details.
29223 + *
29224 + * You should have received a copy of the GNU General Public License
29225 + * along with this program; if not, write to the Free Software
29226 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29227 + * 02111-1307 USA
29228 + */
29229 +#include <linux/module.h>
29230 +#include <linux/kernel.h>
29231 +#include <linux/vmalloc.h>
29232 +#include <linux/fs.h>
29233 +#include <linux/mm.h>
29234 +#include <linux/random.h>
29235 +#include <linux/uaccess.h>
29236 +#include <linux/perfmon_kern.h>
29237 +
29238 +#include "perfmon_priv.h"
29239 +
29240 +/**
29241 + * pfm_smpl_buf_alloc - allocate memory for sampling buffer
29242 + * @ctx: context to operate on
29243 + * @rsize: requested size
29244 + *
29245 + * called from pfm_smpl_buffer_alloc_old() (IA64-COMPAT)
29246 + * and pfm_setup_smpl_fmt()
29247 + *
29248 + * interrupts are enabled, context is not locked.
29249 + *
29250 + * function is not static because it is called from the IA-64
29251 + * compatibility module (perfmon_compat.c)
29252 + */
29253 +int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize)
29254 +{
29255 +#if PFM_ARCH_SMPL_ALIGN_SIZE > 0
29256 +#define PFM_ALIGN_SMPL(a, f) (void *)((((unsigned long)(a))+(f-1)) & ~(f-1))
29257 +#else
29258 +#define PFM_ALIGN_SMPL(a, f) (a)
29259 +#endif
29260 + void *addr, *real_addr;
29261 + size_t size, real_size;
29262 + int ret;
29263 +
29264 + might_sleep();
29265 +
29266 + /*
29267 + * align page boundary
29268 + */
29269 + size = PAGE_ALIGN(rsize);
29270 +
29271 + /*
29272 + * On some arch, it may be necessary to get an alignment greater
29273 + * than page size to avoid certain cache effects (e.g., MIPS).
29274 + * This is the reason for PFM_ARCH_SMPL_ALIGN_SIZE.
29275 + */
29276 + real_size = size + PFM_ARCH_SMPL_ALIGN_SIZE;
29277 +
29278 + PFM_DBG("req_size=%zu size=%zu real_size=%zu",
29279 + rsize,
29280 + size,
29281 + real_size);
29282 +
29283 + ret = pfm_smpl_buf_space_acquire(ctx, real_size);
29284 + if (ret)
29285 + return ret;
29286 +
29287 + /*
29288 + * vmalloc can sleep. we do not hold
29289 + * any spinlock and interrupts are enabled
29290 + */
29291 + real_addr = addr = vmalloc(real_size);
29292 + if (!real_addr) {
29293 + PFM_DBG("cannot allocate sampling buffer");
29294 + goto unres;
29295 + }
29296 +
29297 + /*
29298 + * align the useable sampling buffer address to the arch requirement
29299 + * This is a nop on most architectures
29300 + */
29301 + addr = PFM_ALIGN_SMPL(real_addr, PFM_ARCH_SMPL_ALIGN_SIZE);
29302 +
29303 + memset(addr, 0, real_size);
29304 +
29305 + /*
29306 + * due to cache aliasing, it may be necessary to flush the pages
29307 + * on certain architectures (e.g., MIPS)
29308 + */
29309 + pfm_cacheflush(addr, real_size);
29310 +
29311 + /*
29312 + * what needs to be freed
29313 + */
29314 + ctx->smpl_real_addr = real_addr;
29315 + ctx->smpl_real_size = real_size;
29316 +
29317 + /*
29318 + * what is actually available to user
29319 + */
29320 + ctx->smpl_addr = addr;
29321 + ctx->smpl_size = size;
29322 +
29323 + PFM_DBG("addr=%p real_addr=%p", addr, real_addr);
29324 +
29325 + return 0;
29326 +unres:
29327 + /*
29328 + * smpl_addr is NULL, no double freeing possible in pfm_context_free()
29329 + */
29330 + pfm_smpl_buf_space_release(ctx, real_size);
29331 +
29332 + return -ENOMEM;
29333 +}
29334 +
29335 +/**
29336 + * pfm_smpl_buf_free - free resources associated with sampling
29337 + * @ctx: context to operate on
29338 + */
29339 +void pfm_smpl_buf_free(struct pfm_context *ctx)
29340 +{
29341 + struct pfm_smpl_fmt *fmt;
29342 +
29343 + fmt = ctx->smpl_fmt;
29344 +
29345 + /*
29346 + * some formats may not use a buffer, yet they may
29347 + * need to be called on exit
29348 + */
29349 + if (fmt) {
29350 + if (fmt->fmt_exit)
29351 + (*fmt->fmt_exit)(ctx->smpl_addr);
29352 + /*
29353 + * decrease refcount of sampling format
29354 + */
29355 + pfm_smpl_fmt_put(fmt);
29356 + }
29357 +
29358 + if (ctx->smpl_addr) {
29359 + pfm_smpl_buf_space_release(ctx, ctx->smpl_real_size);
29360 +
29361 + PFM_DBG("free buffer real_addr=0x%p real_size=%zu",
29362 + ctx->smpl_real_addr,
29363 + ctx->smpl_real_size);
29364 +
29365 + vfree(ctx->smpl_real_addr);
29366 + }
29367 +}
29368 +
29369 +/**
29370 + * pfm_setup_smpl_fmt - initialization of sampling format and buffer
29371 + * @ctx: context to operate on
29372 + * @fmt_arg: smapling format arguments
29373 + * @ctx_flags: context flags as passed by user
29374 + * @filp: file descriptor associated with context
29375 + *
29376 + * called from __pfm_create_context()
29377 + */
29378 +int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg,
29379 + struct file *filp)
29380 +{
29381 + struct pfm_smpl_fmt *fmt;
29382 + size_t size = 0;
29383 + int ret = 0;
29384 +
29385 + fmt = ctx->smpl_fmt;
29386 +
29387 + /*
29388 + * validate parameters
29389 + */
29390 + if (fmt->fmt_validate) {
29391 + ret = (*fmt->fmt_validate)(ctx_flags,
29392 + ctx->regs.num_pmds,
29393 + fmt_arg);
29394 + PFM_DBG("validate(0x%x,%p)=%d", ctx_flags, fmt_arg, ret);
29395 + if (ret)
29396 + goto error;
29397 + }
29398 +
29399 + /*
29400 + * check if buffer format needs buffer allocation
29401 + */
29402 + size = 0;
29403 + if (fmt->fmt_getsize) {
29404 + ret = (*fmt->fmt_getsize)(ctx_flags, fmt_arg, &size);
29405 + if (ret) {
29406 + PFM_DBG("cannot get size ret=%d", ret);
29407 + goto error;
29408 + }
29409 + }
29410 +
29411 + /*
29412 + * allocate buffer
29413 + * v20_compat is for IA-64 backward compatibility with perfmon v2.0
29414 + */
29415 + if (size) {
29416 +#ifdef CONFIG_IA64_PERFMON_COMPAT
29417 + /*
29418 + * backward compatibility with perfmon v2.0 on Ia-64
29419 + */
29420 + if (ctx->flags.ia64_v20_compat)
29421 + ret = pfm_smpl_buf_alloc_compat(ctx, size, filp);
29422 + else
29423 +#endif
29424 + ret = pfm_smpl_buf_alloc(ctx, size);
29425 +
29426 + if (ret)
29427 + goto error;
29428 +
29429 + }
29430 +
29431 + if (fmt->fmt_init) {
29432 + ret = (*fmt->fmt_init)(ctx, ctx->smpl_addr, ctx_flags,
29433 + ctx->regs.num_pmds,
29434 + fmt_arg);
29435 + }
29436 + /*
29437 + * if there was an error, the buffer/resource will be freed by
29438 + * via pfm_context_free()
29439 + */
29440 +error:
29441 + return ret;
29442 +}
29443 +
29444 +void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
29445 +{
29446 + u64 now;
29447 +
29448 + now = sched_clock();
29449 +
29450 + /*
29451 + * we save the PMD values such that we can read them while
29452 + * MASKED without having the thread stopped
29453 + * because monitoring is stopped
29454 + *
29455 + * pfm_save_pmds() could be avoided if we knew
29456 + * that pfm_arch_intr_freeze() had saved them already
29457 + */
29458 + pfm_save_pmds(ctx, set);
29459 + pfm_arch_mask_monitoring(ctx, set);
29460 + /*
29461 + * accumulate the set duration up to this point
29462 + */
29463 + set->duration += now - set->duration_start;
29464 +
29465 + ctx->state = PFM_CTX_MASKED;
29466 +
29467 + /*
29468 + * need to stop timer and remember remaining time
29469 + * will be reloaded in pfm_unmask_monitoring
29470 + * hrtimer is cancelled in the tail of the interrupt
29471 + * handler once the context is unlocked
29472 + */
29473 + if (set->flags & PFM_SETFL_TIME_SWITCH) {
29474 + struct hrtimer *h = &__get_cpu_var(pfm_hrtimer);
29475 + hrtimer_cancel(h);
29476 + set->hrtimer_rem = hrtimer_get_remaining(h);
29477 + }
29478 + PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart);
29479 +}
29480 +
29481 +/**
29482 + * pfm_unmask_monitoring - unmask monitoring
29483 + * @ctx: context to work with
29484 + * @set: current active set
29485 + *
29486 + * interrupts are masked when entering this function.
29487 + * context must be in MASKED state when calling.
29488 + *
29489 + * Upon return, the active set may have changed when using timeout
29490 + * based switching.
29491 + */
29492 +static void pfm_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
29493 +{
29494 + if (ctx->state != PFM_CTX_MASKED)
29495 + return;
29496 +
29497 + PFM_DBG_ovfl("unmasking monitoring");
29498 +
29499 + /*
29500 + * must be done before calling
29501 + * pfm_arch_unmask_monitoring()
29502 + */
29503 + ctx->state = PFM_CTX_LOADED;
29504 +
29505 + /*
29506 + * we need to restore the PMDs because they
29507 + * may have been modified by user while MASKED in
29508 + * which case the actual registers have no yet
29509 + * been updated
29510 + */
29511 + pfm_arch_restore_pmds(ctx, set);
29512 +
29513 + /*
29514 + * call arch specific handler
29515 + */
29516 + pfm_arch_unmask_monitoring(ctx, set);
29517 +
29518 + /*
29519 + * clear force reload flag. May have been set
29520 + * in pfm_write_pmcs or pfm_write_pmds
29521 + */
29522 + set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
29523 +
29524 + /*
29525 + * reset set duration timer
29526 + */
29527 + set->duration_start = sched_clock();
29528 +
29529 + /*
29530 + * restart hrtimer if needed
29531 + */
29532 + if (set->flags & PFM_SETFL_TIME_SWITCH) {
29533 + pfm_restart_timer(ctx, set);
29534 + /* careful here as pfm_restart_timer may switch sets */
29535 + }
29536 +}
29537 +
29538 +void pfm_reset_pmds(struct pfm_context *ctx,
29539 + struct pfm_event_set *set,
29540 + int num_pmds,
29541 + int reset_mode)
29542 +{
29543 + u64 val, mask, new_seed;
29544 + struct pfm_pmd *reg;
29545 + unsigned int i, not_masked;
29546 +
29547 + not_masked = ctx->state != PFM_CTX_MASKED;
29548 +
29549 + PFM_DBG_ovfl("%s r_pmds=0x%llx not_masked=%d",
29550 + reset_mode == PFM_PMD_RESET_LONG ? "long" : "short",
29551 + (unsigned long long)set->reset_pmds[0],
29552 + not_masked);
29553 +
29554 + pfm_stats_inc(reset_pmds_count);
29555 +
29556 + for (i = 0; num_pmds; i++) {
29557 + if (test_bit(i, cast_ulp(set->reset_pmds))) {
29558 + num_pmds--;
29559 +
29560 + reg = set->pmds + i;
29561 +
29562 + val = reset_mode == PFM_PMD_RESET_LONG ?
29563 + reg->long_reset : reg->short_reset;
29564 +
29565 + if (reg->flags & PFM_REGFL_RANDOM) {
29566 + mask = reg->mask;
29567 + new_seed = random32();
29568 +
29569 + /* construct a full 64-bit random value: */
29570 + if ((unlikely(mask >> 32) != 0))
29571 + new_seed |= (u64)random32() << 32;
29572 +
29573 + /* counter values are negative numbers! */
29574 + val -= (new_seed & mask);
29575 + }
29576 +
29577 + set->pmds[i].value = val;
29578 + reg->lval = val;
29579 +
29580 + /*
29581 + * not all PMD to reset are necessarily
29582 + * counters
29583 + */
29584 + if (not_masked)
29585 + pfm_write_pmd(ctx, i, val);
29586 +
29587 + PFM_DBG_ovfl("set%u pmd%u sval=0x%llx",
29588 + set->id,
29589 + i,
29590 + (unsigned long long)val);
29591 + }
29592 + }
29593 +
29594 + /*
29595 + * done with reset
29596 + */
29597 + bitmap_zero(cast_ulp(set->reset_pmds), i);
29598 +
29599 + /*
29600 + * make changes visible
29601 + */
29602 + if (not_masked)
29603 + pfm_arch_serialize();
29604 +}
29605 +
29606 +/*
29607 + * called from pfm_handle_work() and __pfm_restart()
29608 + * for system-wide and per-thread context to resume
29609 + * monitoring after a user level notification.
29610 + *
29611 + * In both cases, the context is locked and interrupts
29612 + * are disabled.
29613 + */
29614 +void pfm_resume_after_ovfl(struct pfm_context *ctx)
29615 +{
29616 + struct pfm_smpl_fmt *fmt;
29617 + u32 rst_ctrl;
29618 + struct pfm_event_set *set;
29619 + u64 *reset_pmds;
29620 + void *hdr;
29621 + int state, ret;
29622 +
29623 + hdr = ctx->smpl_addr;
29624 + fmt = ctx->smpl_fmt;
29625 + state = ctx->state;
29626 + set = ctx->active_set;
29627 + ret = 0;
29628 +
29629 + if (hdr) {
29630 + rst_ctrl = 0;
29631 + prefetch(hdr);
29632 + } else {
29633 + rst_ctrl = PFM_OVFL_CTRL_RESET;
29634 + }
29635 +
29636 + /*
29637 + * if using a sampling buffer format and it has a restart callback,
29638 + * then invoke it. hdr may be NULL, if the format does not use a
29639 + * perfmon buffer
29640 + */
29641 + if (fmt && fmt->fmt_restart)
29642 + ret = (*fmt->fmt_restart)(state == PFM_CTX_LOADED, &rst_ctrl,
29643 + hdr);
29644 +
29645 + reset_pmds = set->reset_pmds;
29646 +
29647 + PFM_DBG("fmt_restart=%d reset_count=%d set=%u r_pmds=0x%llx switch=%d "
29648 + "ctx_state=%d",
29649 + ret,
29650 + ctx->flags.reset_count,
29651 + set->id,
29652 + (unsigned long long)reset_pmds[0],
29653 + (set->priv_flags & PFM_SETFL_PRIV_SWITCH),
29654 + state);
29655 +
29656 + if (!ret) {
29657 + /*
29658 + * switch set if needed
29659 + */
29660 + if (set->priv_flags & PFM_SETFL_PRIV_SWITCH) {
29661 + set->priv_flags &= ~PFM_SETFL_PRIV_SWITCH;
29662 + pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_LONG, 0);
29663 + set = ctx->active_set;
29664 + } else if (rst_ctrl & PFM_OVFL_CTRL_RESET) {
29665 + int nn;
29666 + nn = bitmap_weight(cast_ulp(set->reset_pmds),
29667 + ctx->regs.max_pmd);
29668 + if (nn)
29669 + pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_LONG);
29670 + }
29671 +
29672 + if (!(rst_ctrl & PFM_OVFL_CTRL_MASK))
29673 + pfm_unmask_monitoring(ctx, set);
29674 + else
29675 + PFM_DBG("stopping monitoring?");
29676 + ctx->state = PFM_CTX_LOADED;
29677 + }
29678 +}
29679 +
29680 +/*
29681 + * This function is called when we need to perform asynchronous
29682 + * work on a context. This function is called ONLY when about to
29683 + * return to user mode (very much like with signal handling).
29684 + *
29685 + * There are several reasons why we come here:
29686 + *
29687 + * - per-thread mode, not self-monitoring, to reset the counters
29688 + * after a pfm_restart()
29689 + *
29690 + * - we are zombie and we need to cleanup our state
29691 + *
29692 + * - we need to block after an overflow notification
29693 + * on a context with the PFM_OVFL_NOTIFY_BLOCK flag
29694 + *
29695 + * This function is never called for a system-wide context.
29696 + *
29697 + * pfm_handle_work() can be called with interrupts enabled
29698 + * (TIF_NEED_RESCHED) or disabled. The down_interruptible
29699 + * call may sleep, therefore we must re-enable interrupts
29700 + * to avoid deadlocks. It is safe to do so because this function
29701 + * is called ONLY when returning to user level, in which case
29702 + * there is no risk of kernel stack overflow due to deep
29703 + * interrupt nesting.
29704 + */
29705 +void pfm_handle_work(struct pt_regs *regs)
29706 +{
29707 + struct pfm_context *ctx;
29708 + unsigned long flags, dummy_flags;
29709 + int type, ret, info;
29710 +
29711 +#ifdef CONFIG_PPC
29712 + /*
29713 + * This is just a temporary fix. Obviously we'd like to fix the powerpc
29714 + * code to make that check before calling __pfm_handle_work() to
29715 + * prevent the function call overhead, but the call is made from
29716 + * assembly code, so it will take a little while to figure out how to
29717 + * perform the check correctly.
29718 + */
29719 + if (!test_thread_flag(TIF_PERFMON_WORK))
29720 + return;
29721 +#endif
29722 +
29723 + if (!user_mode(regs))
29724 + return;
29725 +
29726 + clear_thread_flag(TIF_PERFMON_WORK);
29727 +
29728 + pfm_stats_inc(handle_work_count);
29729 +
29730 + ctx = current->pfm_context;
29731 + if (ctx == NULL) {
29732 + PFM_DBG("[%d] has no ctx", current->pid);
29733 + return;
29734 + }
29735 +
29736 + BUG_ON(ctx->flags.system);
29737 +
29738 + spin_lock_irqsave(&ctx->lock, flags);
29739 +
29740 + type = ctx->flags.work_type;
29741 + ctx->flags.work_type = PFM_WORK_NONE;
29742 +
29743 + PFM_DBG("work_type=%d reset_count=%d",
29744 + type,
29745 + ctx->flags.reset_count);
29746 +
29747 + switch (type) {
29748 + case PFM_WORK_ZOMBIE:
29749 + goto do_zombie;
29750 + case PFM_WORK_RESET:
29751 + /* simply reset, no blocking */
29752 + goto skip_blocking;
29753 + case PFM_WORK_NONE:
29754 + PFM_DBG("unexpected PFM_WORK_NONE");
29755 + goto nothing_todo;
29756 + case PFM_WORK_BLOCK:
29757 + break;
29758 + default:
29759 + PFM_DBG("unkown type=%d", type);
29760 + goto nothing_todo;
29761 + }
29762 +
29763 + /*
29764 + * restore interrupt mask to what it was on entry.
29765 + * Could be enabled/disabled.
29766 + */
29767 + spin_unlock_irqrestore(&ctx->lock, flags);
29768 +
29769 + /*
29770 + * force interrupt enable because of down_interruptible()
29771 + */
29772 + local_irq_enable();
29773 +
29774 + PFM_DBG("before block sleeping");
29775 +
29776 + /*
29777 + * may go through without blocking on SMP systems
29778 + * if restart has been received already by the time we call down()
29779 + */
29780 + ret = wait_for_completion_interruptible(&ctx->restart_complete);
29781 +
29782 + PFM_DBG("after block sleeping ret=%d", ret);
29783 +
29784 + /*
29785 + * lock context and mask interrupts again
29786 + * We save flags into a dummy because we may have
29787 + * altered interrupts mask compared to entry in this
29788 + * function.
29789 + */
29790 + spin_lock_irqsave(&ctx->lock, dummy_flags);
29791 +
29792 + if (ctx->state == PFM_CTX_ZOMBIE)
29793 + goto do_zombie;
29794 +
29795 + /*
29796 + * in case of interruption of down() we don't restart anything
29797 + */
29798 + if (ret < 0)
29799 + goto nothing_todo;
29800 +
29801 +skip_blocking:
29802 + /*
29803 + * iterate over the number of pending resets
29804 + * There are certain situations where there may be
29805 + * multiple notifications sent before a pfm_restart().
29806 + * As such, it may be that multiple pfm_restart() are
29807 + * issued before the monitored thread gets to
29808 + * pfm_handle_work(). To avoid losing restarts, pfm_restart()
29809 + * increments a counter (reset_counts). Here, we take this
29810 + * into account by potentially calling pfm_resume_after_ovfl()
29811 + * multiple times. It is up to the sampling format to take the
29812 + * appropriate actions.
29813 + */
29814 + while (ctx->flags.reset_count) {
29815 + pfm_resume_after_ovfl(ctx);
29816 + /* careful as active set may have changed */
29817 + ctx->flags.reset_count--;
29818 + }
29819 +
29820 +nothing_todo:
29821 + /*
29822 + * restore flags as they were upon entry
29823 + */
29824 + spin_unlock_irqrestore(&ctx->lock, flags);
29825 + return;
29826 +
29827 +do_zombie:
29828 + PFM_DBG("context is zombie, bailing out");
29829 +
29830 + __pfm_unload_context(ctx, &info);
29831 +
29832 + /*
29833 + * keep the spinlock check happy
29834 + */
29835 + spin_unlock(&ctx->lock);
29836 +
29837 + /*
29838 + * enable interrupt for vfree()
29839 + */
29840 + local_irq_enable();
29841 +
29842 + /*
29843 + * cancel timer now that context is unlocked
29844 + */
29845 + if (info & 0x2) {
29846 + ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
29847 + PFM_DBG("timeout cancel=%d", ret);
29848 + }
29849 +
29850 + /*
29851 + * actual context free
29852 + */
29853 + pfm_free_context(ctx);
29854 +
29855 + /*
29856 + * restore interrupts as they were upon entry
29857 + */
29858 + local_irq_restore(flags);
29859 +
29860 + /* always true */
29861 + if (info & 0x1)
29862 + pfm_session_release(0, 0);
29863 +}
29864 +
29865 +/**
29866 + * __pfm_restart - resume monitoring after user-level notification
29867 + * @ctx: context to operate on
29868 + * @info: return information used to free resource once unlocked
29869 + *
29870 + * function called from sys_pfm_restart(). It is used when overflow
29871 + * notification is requested. For each notification received, the user
29872 + * must call pfm_restart() to indicate to the kernel that it is done
29873 + * processing the notification.
29874 + *
29875 + * When the caller is doing user level sampling, this function resets
29876 + * the overflowed counters and resumes monitoring which is normally stopped
29877 + * during notification (always the consequence of a counter overflow).
29878 + *
29879 + * When using a sampling format, the format restart() callback is invoked,
29880 + * overflowed PMDS may be reset based upon decision from sampling format.
29881 + *
29882 + * When operating in per-thread mode, and when not self-monitoring, the
29883 + * monitored thread DOES NOT need to be stopped, unlike for many other calls.
29884 + *
29885 + * This means that the effect of the restart may not necessarily be observed
29886 + * right when returning from the call. For instance, counters may not already
29887 + * be reset in the other thread.
29888 + *
29889 + * When operating in system-wide, the caller must be running on the monitored
29890 + * CPU.
29891 + *
29892 + * The context is locked and interrupts are disabled.
29893 + *
29894 + * info value upon return:
29895 + * - bit 0: when set, mudt issue complete() on restart semaphore
29896 + */
29897 +int __pfm_restart(struct pfm_context *ctx, int *info)
29898 +{
29899 + int state;
29900 +
29901 + state = ctx->state;
29902 +
29903 + PFM_DBG("state=%d can_restart=%d reset_count=%d",
29904 + state,
29905 + ctx->flags.can_restart,
29906 + ctx->flags.reset_count);
29907 +
29908 + *info = 0;
29909 +
29910 + switch (state) {
29911 + case PFM_CTX_MASKED:
29912 + break;
29913 + case PFM_CTX_LOADED:
29914 + if (ctx->smpl_addr && ctx->smpl_fmt->fmt_restart)
29915 + break;
29916 + default:
29917 + PFM_DBG("invalid state=%d", state);
29918 + return -EBUSY;
29919 + }
29920 +
29921 + /*
29922 + * first check if allowed to restart, i.e., notifications received
29923 + */
29924 + if (!ctx->flags.can_restart) {
29925 + PFM_DBG("no restart can_restart=0");
29926 + return -EBUSY;
29927 + }
29928 +
29929 + pfm_stats_inc(pfm_restart_count);
29930 +
29931 + /*
29932 + * at this point, the context is either LOADED or MASKED
29933 + */
29934 + ctx->flags.can_restart--;
29935 +
29936 + /*
29937 + * handle self-monitoring case and system-wide
29938 + */
29939 + if (ctx->task == current || ctx->flags.system) {
29940 + pfm_resume_after_ovfl(ctx);
29941 + return 0;
29942 + }
29943 +
29944 + /*
29945 + * restart another task
29946 + */
29947 +
29948 + /*
29949 + * if blocking, then post the semaphore if PFM_CTX_MASKED, i.e.
29950 + * the task is blocked or on its way to block. That's the normal
29951 + * restart path. If the monitoring is not masked, then the task
29952 + * can be actively monitoring and we cannot directly intervene.
29953 + * Therefore we use the trap mechanism to catch the task and
29954 + * force it to reset the buffer/reset PMDs.
29955 + *
29956 + * if non-blocking, then we ensure that the task will go into
29957 + * pfm_handle_work() before returning to user mode.
29958 + *
29959 + * We cannot explicitly reset another task, it MUST always
29960 + * be done by the task itself. This works for system wide because
29961 + * the tool that is controlling the session is logically doing
29962 + * "self-monitoring".
29963 + */
29964 + if (ctx->flags.block && state == PFM_CTX_MASKED) {
29965 + PFM_DBG("unblocking [%d]", ctx->task->pid);
29966 + /*
29967 + * It is not possible to call complete() with the context locked
29968 + * otherwise we have a potential deadlock with the PMU context
29969 + * switch code due to a lock inversion between task_rq_lock()
29970 + * and the context lock.
29971 + * Instead we mark whether or not we need to issue the complete
29972 + * and we invoke the function once the context lock is released
29973 + * in sys_pfm_restart()
29974 + */
29975 + *info = 1;
29976 + } else {
29977 + PFM_DBG("[%d] armed exit trap", ctx->task->pid);
29978 + pfm_post_work(ctx->task, ctx, PFM_WORK_RESET);
29979 + }
29980 + ctx->flags.reset_count++;
29981 + return 0;
29982 +}
29983 +
29984 +/**
29985 + * pfm_get_smpl_arg -- copy user arguments to pfm_create_context() related to sampling format
29986 + * @name: format name as passed by user
29987 + * @fmt_arg: format optional argument as passed by user
29988 + * @uszie: size of structure pass in fmt_arg
29989 + * @arg: kernel copy of fmt_arg
29990 + * @fmt: pointer to sampling format upon success
29991 + *
29992 + * arg is kmalloc'ed, thus it needs a kfree by caller
29993 + */
29994 +int pfm_get_smpl_arg(char __user *fmt_uname, void __user *fmt_uarg, size_t usize, void **arg,
29995 + struct pfm_smpl_fmt **fmt)
29996 +{
29997 + struct pfm_smpl_fmt *f;
29998 + char *fmt_name;
29999 + void *addr = NULL;
30000 + size_t sz;
30001 + int ret;
30002 +
30003 + fmt_name = getname(fmt_uname);
30004 + if (!fmt_name) {
30005 + PFM_DBG("getname failed");
30006 + return -ENOMEM;
30007 + }
30008 +
30009 + /*
30010 + * find fmt and increase refcount
30011 + */
30012 + f = pfm_smpl_fmt_get(fmt_name);
30013 +
30014 + putname(fmt_name);
30015 +
30016 + if (f == NULL) {
30017 + PFM_DBG("buffer format not found");
30018 + return -EINVAL;
30019 + }
30020 +
30021 + /*
30022 + * expected format argument size
30023 + */
30024 + sz = f->fmt_arg_size;
30025 +
30026 + /*
30027 + * check user size matches expected size
30028 + * usize = -1 is for IA-64 backward compatibility
30029 + */
30030 + ret = -EINVAL;
30031 + if (sz != usize && usize != -1) {
30032 + PFM_DBG("invalid arg size %zu, format expects %zu",
30033 + usize, sz);
30034 + goto error;
30035 + }
30036 +
30037 + if (sz) {
30038 + ret = -ENOMEM;
30039 + addr = kmalloc(sz, GFP_KERNEL);
30040 + if (addr == NULL)
30041 + goto error;
30042 +
30043 + ret = -EFAULT;
30044 + if (copy_from_user(addr, fmt_uarg, sz))
30045 + goto error;
30046 + }
30047 + *arg = addr;
30048 + *fmt = f;
30049 + return 0;
30050 +
30051 +error:
30052 + kfree(addr);
30053 + pfm_smpl_fmt_put(f);
30054 + return ret;
30055 +}
30056 diff --git a/perfmon/perfmon_syscalls.c b/perfmon/perfmon_syscalls.c
30057 new file mode 100644
30058 index 0000000..8777b58
30059 --- /dev/null
30060 +++ b/perfmon/perfmon_syscalls.c
30061 @@ -0,0 +1,1060 @@
30062 +/*
30063 + * perfmon_syscalls.c: perfmon2 system call interface
30064 + *
30065 + * This file implements the perfmon2 interface which
30066 + * provides access to the hardware performance counters
30067 + * of the host processor.
30068 + *
30069 + * The initial version of perfmon.c was written by
30070 + * Ganesh Venkitachalam, IBM Corp.
30071 + *
30072 + * Then it was modified for perfmon-1.x by Stephane Eranian and
30073 + * David Mosberger, Hewlett Packard Co.
30074 + *
30075 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
30076 + * by Stephane Eranian, Hewlett Packard Co.
30077 + *
30078 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
30079 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
30080 + * David Mosberger-Tang <davidm@hpl.hp.com>
30081 + *
30082 + * More information about perfmon available at:
30083 + * http://perfmon2.sf.net
30084 + *
30085 + * This program is free software; you can redistribute it and/or
30086 + * modify it under the terms of version 2 of the GNU General Public
30087 + * License as published by the Free Software Foundation.
30088 + *
30089 + * This program is distributed in the hope that it will be useful,
30090 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
30091 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30092 + * General Public License for more details.
30093 + *
30094 + * You should have received a copy of the GNU General Public License
30095 + * along with this program; if not, write to the Free Software
30096 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30097 + * 02111-1307 USA
30098 + */
30099 +#include <linux/kernel.h>
30100 +#include <linux/fs.h>
30101 +#include <linux/ptrace.h>
30102 +#include <linux/perfmon_kern.h>
30103 +#include <linux/uaccess.h>
30104 +#include "perfmon_priv.h"
30105 +
30106 +/*
30107 + * Context locking rules:
30108 + * ---------------------
30109 + * - any thread with access to the file descriptor of a context can
30110 + * potentially issue perfmon calls
30111 + *
30112 + * - calls must be serialized to guarantee correctness
30113 + *
30114 + * - as soon as a context is attached to a thread or CPU, it may be
30115 + * actively monitoring. On some architectures, such as IA-64, this
30116 + * is true even though the pfm_start() call has not been made. This
30117 + * comes from the fact that on some architectures, it is possible to
30118 + * start/stop monitoring from userland.
30119 + *
30120 + * - If monitoring is active, then there can PMU interrupts. Because
30121 + * context accesses must be serialized, the perfmon system calls
30122 + * must mask interrupts as soon as the context is attached.
30123 + *
30124 + * - perfmon system calls that operate with the context unloaded cannot
30125 + * assume it is actually unloaded when they are called. They first need
30126 + * to check and for that they need interrupts masked. Then, if the
30127 + * context is actually unloaded, they can unmask interrupts.
30128 + *
30129 + * - interrupt masking holds true for other internal perfmon functions as
30130 + * well. Except for PMU interrupt handler because those interrupts
30131 + * cannot be nested.
30132 + *
30133 + * - we mask ALL interrupts instead of just the PMU interrupt because we
30134 + * also need to protect against timer interrupts which could trigger
30135 + * a set switch.
30136 + */
30137 +#ifdef CONFIG_UTRACE
30138 +#include <linux/utrace.h>
30139 +
30140 +static u32
30141 +stopper_quiesce(struct utrace_attached_engine *engine, struct task_struct *tsk)
30142 +{
30143 + PFM_DBG("quiesced [%d]", tsk->pid);
30144 + complete(engine->data);
30145 + return UTRACE_ACTION_RESUME;
30146 +}
30147 +
30148 +void
30149 +pfm_resume_task(struct task_struct *t, void *data)
30150 +{
30151 + PFM_DBG("utrace detach [%d]", t->pid);
30152 + (void) utrace_detach(t, data);
30153 +}
30154 +
30155 +static const struct utrace_engine_ops utrace_ops =
30156 +{
30157 + .report_quiesce = stopper_quiesce,
30158 +};
30159 +
30160 +static int pfm_wait_task_stopped(struct task_struct *task, void **data)
30161 +{
30162 + DECLARE_COMPLETION_ONSTACK(done);
30163 + struct utrace_attached_engine *eng;
30164 + int ret;
30165 +
30166 + eng = utrace_attach(task, UTRACE_ATTACH_CREATE, &utrace_ops, &done);
30167 + if (IS_ERR(eng))
30168 + return PTR_ERR(eng);
30169 +
30170 + ret = utrace_set_flags(task, eng,
30171 + UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE));
30172 + PFM_DBG("wait quiesce [%d]", task->pid);
30173 + if (!ret)
30174 + ret = wait_for_completion_interruptible(&done);
30175 +
30176 + if (ret)
30177 + (void) utrace_detach(task, eng);
30178 + else
30179 + *data = eng;
30180 + return 0;
30181 +}
30182 +#else /* !CONFIG_UTRACE */
30183 +static int pfm_wait_task_stopped(struct task_struct *task, void **data)
30184 +{
30185 + int ret;
30186 +
30187 + *data = NULL;
30188 +
30189 + /*
30190 + * returns 0 if cannot attach
30191 + */
30192 + ret = ptrace_may_access(task, PTRACE_MODE_ATTACH);
30193 + PFM_DBG("may_attach=%d", ret);
30194 + if (!ret)
30195 + return -EPERM;
30196 +
30197 + ret = ptrace_check_attach(task, 0);
30198 + PFM_DBG("check_attach=%d", ret);
30199 + return ret;
30200 +}
30201 +void pfm_resume_task(struct task_struct *t, void *data)
30202 +{}
30203 +#endif
30204 +
30205 +struct pfm_syscall_cookie {
30206 + struct file *filp;
30207 + int fput_needed;
30208 +};
30209 +
30210 +/*
30211 + * cannot attach if :
30212 + * - kernel task
30213 + * - task not owned by caller (checked by ptrace_may_attach())
30214 + * - task is dead or zombie
30215 + * - cannot use blocking notification when self-monitoring
30216 + */
30217 +static int pfm_task_incompatible(struct pfm_context *ctx,
30218 + struct task_struct *task)
30219 +{
30220 + /*
30221 + * cannot attach to a kernel thread
30222 + */
30223 + if (!task->mm) {
30224 + PFM_DBG("cannot attach to kernel thread [%d]", task->pid);
30225 + return -EPERM;
30226 + }
30227 +
30228 + /*
30229 + * cannot use block on notification when
30230 + * self-monitoring.
30231 + */
30232 + if (ctx->flags.block && task == current) {
30233 + PFM_DBG("cannot use block on notification when self-monitoring"
30234 + "[%d]", task->pid);
30235 + return -EINVAL;
30236 + }
30237 + /*
30238 + * cannot attach to a zombie task
30239 + */
30240 + if (task->exit_state == EXIT_ZOMBIE || task->exit_state == EXIT_DEAD) {
30241 + PFM_DBG("cannot attach to zombie/dead task [%d]", task->pid);
30242 + return -EBUSY;
30243 + }
30244 + return 0;
30245 +}
30246 +
30247 +/**
30248 + * pfm_get_task -- check permission and acquire task to monitor
30249 + * @ctx: perfmon context
30250 + * @pid: identification of the task to check
30251 + * @task: upon return, a pointer to the task to monitor
30252 + *
30253 + * This function is used in per-thread mode only AND when not
30254 + * self-monitoring. It finds the task to monitor and checks
30255 + * that the caller has permissions to attach. It also checks
30256 + * that the task is stopped via ptrace so that we can safely
30257 + * modify its state.
30258 + *
30259 + * task refcount is incremented when succesful.
30260 + */
30261 +static int pfm_get_task(struct pfm_context *ctx, pid_t pid,
30262 + struct task_struct **task, void **data)
30263 +{
30264 + struct task_struct *p;
30265 + int ret = 0, ret1 = 0;
30266 +
30267 + *data = NULL;
30268 +
30269 + /*
30270 + * When attaching to another thread we must ensure
30271 + * that the thread is actually stopped.
30272 + *
30273 + * As a consequence, only the ptracing parent can actually
30274 + * attach a context to a thread. Obviously, this constraint
30275 + * does not exist for self-monitoring threads.
30276 + *
30277 + * We use ptrace_may_attach() to check for permission.
30278 + */
30279 + read_lock(&tasklist_lock);
30280 +
30281 + p = find_task_by_vpid(pid);
30282 + if (p)
30283 + get_task_struct(p);
30284 +
30285 + read_unlock(&tasklist_lock);
30286 +
30287 + if (!p) {
30288 + PFM_DBG("task not found %d", pid);
30289 + return -ESRCH;
30290 + }
30291 +
30292 + ret = pfm_task_incompatible(ctx, p);
30293 + if (ret)
30294 + goto error;
30295 +
30296 + ret = pfm_wait_task_stopped(p, data);
30297 + if (ret)
30298 + goto error;
30299 +
30300 + *task = p;
30301 +
30302 + return 0;
30303 +error:
30304 + if (!(ret1 || ret))
30305 + ret = -EPERM;
30306 +
30307 + put_task_struct(p);
30308 +
30309 + return ret;
30310 +}
30311 +
30312 +/*
30313 + * context must be locked when calling this function
30314 + */
30315 +int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
30316 + unsigned long *flags, void **resume)
30317 +{
30318 + struct task_struct *task;
30319 + unsigned long local_flags, new_flags;
30320 + int state, ret;
30321 +
30322 + *resume = NULL;
30323 +
30324 +recheck:
30325 + /*
30326 + * task is NULL for system-wide context
30327 + */
30328 + task = ctx->task;
30329 + state = ctx->state;
30330 + local_flags = *flags;
30331 +
30332 + PFM_DBG("state=%d check_mask=0x%x", state, check_mask);
30333 + /*
30334 + * if the context is detached, then we do not touch
30335 + * hardware, therefore there is not restriction on when we can
30336 + * access it.
30337 + */
30338 + if (state == PFM_CTX_UNLOADED)
30339 + return 0;
30340 + /*
30341 + * no command can operate on a zombie context.
30342 + * A context becomes zombie when the file that identifies
30343 + * it is closed while the context is still attached to the
30344 + * thread it monitors.
30345 + */
30346 + if (state == PFM_CTX_ZOMBIE)
30347 + return -EINVAL;
30348 +
30349 + /*
30350 + * at this point, state is PFM_CTX_LOADED or PFM_CTX_MASKED
30351 + */
30352 +
30353 + /*
30354 + * some commands require the context to be unloaded to operate
30355 + */
30356 + if (check_mask & PFM_CMD_UNLOADED) {
30357 + PFM_DBG("state=%d, cmd needs context unloaded", state);
30358 + return -EBUSY;
30359 + }
30360 +
30361 + /*
30362 + * self-monitoring always ok.
30363 + */
30364 + if (task == current)
30365 + return 0;
30366 +
30367 + /*
30368 + * for syswide, the calling thread must be running on the cpu
30369 + * the context is bound to.
30370 + */
30371 + if (ctx->flags.system) {
30372 + if (ctx->cpu != smp_processor_id())
30373 + return -EBUSY;
30374 + return 0;
30375 + }
30376 +
30377 + /*
30378 + * at this point, monitoring another thread
30379 + */
30380 +
30381 + /*
30382 + * the pfm_unload_context() command is allowed on masked context
30383 + */
30384 + if (state == PFM_CTX_MASKED && !(check_mask & PFM_CMD_UNLOAD))
30385 + return 0;
30386 +
30387 + /*
30388 + * When we operate on another thread, we must wait for it to be
30389 + * stopped and completely off any CPU as we need to access the
30390 + * PMU state (or machine state).
30391 + *
30392 + * A thread can be put in the STOPPED state in various ways
30393 + * including PTRACE_ATTACH, or when it receives a SIGSTOP signal.
30394 + * We enforce that the thread must be ptraced, so it is stopped
30395 + * AND it CANNOT wake up while we operate on it because this
30396 + * would require an action from the ptracing parent which is the
30397 + * thread that is calling this function.
30398 + *
30399 + * The dependency on ptrace, imposes that only the ptracing
30400 + * parent can issue command on a thread. This is unfortunate
30401 + * but we do not know of a better way of doing this.
30402 + */
30403 + if (check_mask & PFM_CMD_STOPPED) {
30404 +
30405 + spin_unlock_irqrestore(&ctx->lock, local_flags);
30406 +
30407 + /*
30408 + * check that the thread is ptraced AND STOPPED
30409 + */
30410 + ret = pfm_wait_task_stopped(task, resume);
30411 +
30412 + spin_lock_irqsave(&ctx->lock, new_flags);
30413 +
30414 + /*
30415 + * flags may be different than when we released the lock
30416 + */
30417 + *flags = new_flags;
30418 +
30419 + if (ret)
30420 + return ret;
30421 + /*
30422 + * we must recheck to verify if state has changed
30423 + */
30424 + if (unlikely(ctx->state != state)) {
30425 + PFM_DBG("old_state=%d new_state=%d",
30426 + state,
30427 + ctx->state);
30428 + goto recheck;
30429 + }
30430 + }
30431 + return 0;
30432 +}
30433 +
30434 +/*
30435 + * pfm_get_args - Function used to copy the syscall argument into kernel memory.
30436 + * @ureq: user argument
30437 + * @sz: user argument size
30438 + * @lsz: size of stack buffer
30439 + * @laddr: stack buffer address
30440 + * @req: point to start of kernel copy of the argument
30441 + * @ptr_free: address of kernel copy to free
30442 + *
30443 + * There are two options:
30444 + * - use a stack buffer described by laddr (addresses) and lsz (size)
30445 + * - allocate memory
30446 + *
30447 + * return:
30448 + * < 0 : in case of error (ptr_free may not be updated)
30449 + * 0 : success
30450 + * - req: points to base of kernel copy of arguments
30451 + * - ptr_free: address of buffer to free by caller on exit.
30452 + * NULL if using the stack buffer
30453 + *
30454 + * when ptr_free is not NULL upon return, the caller must kfree()
30455 + */
30456 +int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
30457 + void **req, void **ptr_free)
30458 +{
30459 + void *addr;
30460 +
30461 + /*
30462 + * check syadmin argument limit
30463 + */
30464 + if (unlikely(sz > pfm_controls.arg_mem_max)) {
30465 + PFM_DBG("argument too big %zu max=%zu",
30466 + sz,
30467 + pfm_controls.arg_mem_max);
30468 + return -E2BIG;
30469 + }
30470 +
30471 + /*
30472 + * check if vector fits on stack buffer
30473 + */
30474 + if (sz > lsz) {
30475 + addr = kmalloc(sz, GFP_KERNEL);
30476 + if (unlikely(addr == NULL))
30477 + return -ENOMEM;
30478 + *ptr_free = addr;
30479 + } else {
30480 + addr = laddr;
30481 + *req = laddr;
30482 + *ptr_free = NULL;
30483 + }
30484 +
30485 + /*
30486 + * bring the data in
30487 + */
30488 + if (unlikely(copy_from_user(addr, ureq, sz))) {
30489 + if (addr != laddr)
30490 + kfree(addr);
30491 + return -EFAULT;
30492 + }
30493 +
30494 + /*
30495 + * base address of kernel buffer
30496 + */
30497 + *req = addr;
30498 +
30499 + return 0;
30500 +}
30501 +
30502 +/**
30503 + * pfm_acquire_ctx_from_fd -- get ctx from file descriptor
30504 + * @fd: file descriptor
30505 + * @ctx: pointer to pointer of context updated on return
30506 + * @cookie: opaque structure to use for release
30507 + *
30508 + * This helper function extracts the ctx from the file descriptor.
30509 + * It also increments the refcount of the file structure. Thus
30510 + * it updates the cookie so the refcount can be decreased when
30511 + * leaving the perfmon syscall via pfm_release_ctx_from_fd
30512 + */
30513 +static int pfm_acquire_ctx_from_fd(int fd, struct pfm_context **ctx,
30514 + struct pfm_syscall_cookie *cookie)
30515 +{
30516 + struct file *filp;
30517 + int fput_needed;
30518 +
30519 + filp = fget_light(fd, &fput_needed);
30520 + if (unlikely(filp == NULL)) {
30521 + PFM_DBG("invalid fd %d", fd);
30522 + return -EBADF;
30523 + }
30524 +
30525 + *ctx = filp->private_data;
30526 +
30527 + if (unlikely(!*ctx || filp->f_op != &pfm_file_ops)) {
30528 + PFM_DBG("fd %d not related to perfmon", fd);
30529 + return -EBADF;
30530 + }
30531 + cookie->filp = filp;
30532 + cookie->fput_needed = fput_needed;
30533 +
30534 + return 0;
30535 +}
30536 +
30537 +/**
30538 + * pfm_release_ctx_from_fd -- decrease refcount of file associated with context
30539 + * @cookie: the cookie structure initialized by pfm_acquire_ctx_from_fd
30540 + */
30541 +static inline void pfm_release_ctx_from_fd(struct pfm_syscall_cookie *cookie)
30542 +{
30543 + fput_light(cookie->filp, cookie->fput_needed);
30544 +}
30545 +
30546 +/*
30547 + * unlike the other perfmon system calls, this one returns a file descriptor
30548 + * or a value < 0 in case of error, very much like open() or socket()
30549 + */
30550 +asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq,
30551 + char __user *fmt_name,
30552 + void __user *fmt_uarg, size_t fmt_size)
30553 +{
30554 + struct pfarg_ctx req;
30555 + struct pfm_smpl_fmt *fmt = NULL;
30556 + void *fmt_arg = NULL;
30557 + int ret;
30558 +
30559 + PFM_DBG("req=%p fmt=%p fmt_arg=%p size=%zu",
30560 + ureq, fmt_name, fmt_uarg, fmt_size);
30561 +
30562 + if (perfmon_disabled)
30563 + return -ENOSYS;
30564 +
30565 + if (copy_from_user(&req, ureq, sizeof(req)))
30566 + return -EFAULT;
30567 +
30568 + if (fmt_name) {
30569 + ret = pfm_get_smpl_arg(fmt_name, fmt_uarg, fmt_size, &fmt_arg, &fmt);
30570 + if (ret)
30571 + goto abort;
30572 + }
30573 +
30574 + ret = __pfm_create_context(&req, fmt, fmt_arg, PFM_NORMAL, NULL);
30575 +
30576 + kfree(fmt_arg);
30577 +abort:
30578 + return ret;
30579 +}
30580 +
30581 +asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, int count)
30582 +{
30583 + struct pfm_context *ctx;
30584 + struct task_struct *task;
30585 + struct pfm_syscall_cookie cookie;
30586 + struct pfarg_pmc pmcs[PFM_PMC_STK_ARG];
30587 + struct pfarg_pmc *req;
30588 + void *fptr, *resume;
30589 + unsigned long flags;
30590 + size_t sz;
30591 + int ret;
30592 +
30593 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30594 +
30595 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
30596 + PFM_DBG("invalid arg count %d", count);
30597 + return -EINVAL;
30598 + }
30599 +
30600 + sz = count*sizeof(*ureq);
30601 +
30602 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30603 + if (ret)
30604 + return ret;
30605 +
30606 + ret = pfm_get_args(ureq, sz, sizeof(pmcs), pmcs, (void **)&req, &fptr);
30607 + if (ret)
30608 + goto error;
30609 +
30610 + spin_lock_irqsave(&ctx->lock, flags);
30611 +
30612 + task = ctx->task;
30613 +
30614 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30615 + if (!ret)
30616 + ret = __pfm_write_pmcs(ctx, req, count);
30617 +
30618 + spin_unlock_irqrestore(&ctx->lock, flags);
30619 +
30620 + if (resume)
30621 + pfm_resume_task(task, resume);
30622 +
30623 + /*
30624 + * This function may be on the critical path.
30625 + * We want to avoid the branch if unecessary.
30626 + */
30627 + if (fptr)
30628 + kfree(fptr);
30629 +error:
30630 + pfm_release_ctx_from_fd(&cookie);
30631 + return ret;
30632 +}
30633 +
30634 +asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, int count)
30635 +{
30636 + struct pfm_context *ctx;
30637 + struct task_struct *task;
30638 + struct pfm_syscall_cookie cookie;
30639 + struct pfarg_pmd pmds[PFM_PMD_STK_ARG];
30640 + struct pfarg_pmd *req;
30641 + void *fptr, *resume;
30642 + unsigned long flags;
30643 + size_t sz;
30644 + int ret;
30645 +
30646 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30647 +
30648 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
30649 + PFM_DBG("invalid arg count %d", count);
30650 + return -EINVAL;
30651 + }
30652 +
30653 + sz = count*sizeof(*ureq);
30654 +
30655 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30656 + if (ret)
30657 + return ret;
30658 +
30659 + ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr);
30660 + if (ret)
30661 + goto error;
30662 +
30663 + spin_lock_irqsave(&ctx->lock, flags);
30664 +
30665 + task = ctx->task;
30666 +
30667 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30668 + if (!ret)
30669 + ret = __pfm_write_pmds(ctx, req, count, 0);
30670 +
30671 + spin_unlock_irqrestore(&ctx->lock, flags);
30672 +
30673 + if (resume)
30674 + pfm_resume_task(task, resume);
30675 +
30676 + if (fptr)
30677 + kfree(fptr);
30678 +error:
30679 + pfm_release_ctx_from_fd(&cookie);
30680 + return ret;
30681 +}
30682 +
30683 +asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, int count)
30684 +{
30685 + struct pfm_context *ctx;
30686 + struct task_struct *task;
30687 + struct pfm_syscall_cookie cookie;
30688 + struct pfarg_pmd pmds[PFM_PMD_STK_ARG];
30689 + struct pfarg_pmd *req;
30690 + void *fptr, *resume;
30691 + unsigned long flags;
30692 + size_t sz;
30693 + int ret;
30694 +
30695 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30696 +
30697 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
30698 + return -EINVAL;
30699 +
30700 + sz = count*sizeof(*ureq);
30701 +
30702 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30703 + if (ret)
30704 + return ret;
30705 +
30706 + ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr);
30707 + if (ret)
30708 + goto error;
30709 +
30710 + spin_lock_irqsave(&ctx->lock, flags);
30711 +
30712 + task = ctx->task;
30713 +
30714 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30715 + if (!ret)
30716 + ret = __pfm_read_pmds(ctx, req, count);
30717 +
30718 + spin_unlock_irqrestore(&ctx->lock, flags);
30719 +
30720 + if (copy_to_user(ureq, req, sz))
30721 + ret = -EFAULT;
30722 +
30723 + if (resume)
30724 + pfm_resume_task(task, resume);
30725 +
30726 + if (fptr)
30727 + kfree(fptr);
30728 +error:
30729 + pfm_release_ctx_from_fd(&cookie);
30730 + return ret;
30731 +}
30732 +
30733 +asmlinkage long sys_pfm_restart(int fd)
30734 +{
30735 + struct pfm_context *ctx;
30736 + struct task_struct *task;
30737 + struct pfm_syscall_cookie cookie;
30738 + void *resume;
30739 + unsigned long flags;
30740 + int ret, info;
30741 +
30742 + PFM_DBG("fd=%d", fd);
30743 +
30744 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30745 + if (ret)
30746 + return ret;
30747 +
30748 + spin_lock_irqsave(&ctx->lock, flags);
30749 +
30750 + task = ctx->task;
30751 +
30752 + ret = pfm_check_task_state(ctx, 0, &flags, &resume);
30753 + if (!ret)
30754 + ret = __pfm_restart(ctx, &info);
30755 +
30756 + spin_unlock_irqrestore(&ctx->lock, flags);
30757 +
30758 + if (resume)
30759 + pfm_resume_task(task, resume);
30760 + /*
30761 + * In per-thread mode with blocking notification, i.e.
30762 + * ctx->flags.blocking=1, we need to defer issuing the
30763 + * complete to unblock the blocked monitored thread.
30764 + * Otherwise we have a potential deadlock due to a lock
30765 + * inversion between the context lock and the task_rq_lock()
30766 + * which can happen if one thread is in this call and the other
30767 + * (the monitored thread) is in the context switch code.
30768 + *
30769 + * It is safe to access the context outside the critical section
30770 + * because:
30771 + * - we are protected by the fget_light(), thus the context
30772 + * cannot disappear
30773 + */
30774 + if (ret == 0 && info == 1)
30775 + complete(&ctx->restart_complete);
30776 +
30777 + pfm_release_ctx_from_fd(&cookie);
30778 + return ret;
30779 +}
30780 +
30781 +asmlinkage long sys_pfm_stop(int fd)
30782 +{
30783 + struct pfm_context *ctx;
30784 + struct task_struct *task;
30785 + struct pfm_syscall_cookie cookie;
30786 + void *resume;
30787 + unsigned long flags;
30788 + int ret;
30789 + int release_info;
30790 +
30791 + PFM_DBG("fd=%d", fd);
30792 +
30793 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30794 + if (ret)
30795 + return ret;
30796 +
30797 + spin_lock_irqsave(&ctx->lock, flags);
30798 +
30799 + task = ctx->task;
30800 +
30801 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30802 + if (!ret)
30803 + ret = __pfm_stop(ctx, &release_info);
30804 +
30805 + spin_unlock_irqrestore(&ctx->lock, flags);
30806 +
30807 + if (resume)
30808 + pfm_resume_task(task, resume);
30809 +
30810 + /*
30811 + * defer cancellation of timer to avoid race
30812 + * with pfm_handle_switch_timeout()
30813 + *
30814 + * applies only when self-monitoring
30815 + */
30816 + if (release_info & 0x2)
30817 + hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
30818 +
30819 + pfm_release_ctx_from_fd(&cookie);
30820 + return ret;
30821 +}
30822 +
30823 +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq)
30824 +{
30825 + struct pfm_context *ctx;
30826 + struct task_struct *task;
30827 + struct pfm_syscall_cookie cookie;
30828 + void *resume;
30829 + struct pfarg_start req;
30830 + unsigned long flags;
30831 + int ret;
30832 +
30833 + PFM_DBG("fd=%d req=%p", fd, ureq);
30834 +
30835 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30836 + if (ret)
30837 + return ret;
30838 +
30839 + /*
30840 + * the one argument is actually optional
30841 + */
30842 + if (ureq && copy_from_user(&req, ureq, sizeof(req)))
30843 + return -EFAULT;
30844 +
30845 + spin_lock_irqsave(&ctx->lock, flags);
30846 +
30847 + task = ctx->task;
30848 +
30849 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
30850 + if (!ret)
30851 + ret = __pfm_start(ctx, ureq ? &req : NULL);
30852 +
30853 + spin_unlock_irqrestore(&ctx->lock, flags);
30854 +
30855 + if (resume)
30856 + pfm_resume_task(task, resume);
30857 +
30858 + pfm_release_ctx_from_fd(&cookie);
30859 + return ret;
30860 +}
30861 +
30862 +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq)
30863 +{
30864 + struct pfm_context *ctx;
30865 + struct task_struct *task;
30866 + struct pfm_syscall_cookie cookie;
30867 + void *resume, *dummy_resume;
30868 + unsigned long flags;
30869 + struct pfarg_load req;
30870 + int ret;
30871 +
30872 + PFM_DBG("fd=%d req=%p", fd, ureq);
30873 +
30874 + if (copy_from_user(&req, ureq, sizeof(req)))
30875 + return -EFAULT;
30876 +
30877 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30878 + if (ret)
30879 + return ret;
30880 +
30881 + task = current;
30882 +
30883 + /*
30884 + * in per-thread mode (not self-monitoring), get a reference
30885 + * on task to monitor. This must be done with interrupts enabled
30886 + * Upon succesful return, refcount on task is increased.
30887 + *
30888 + * fget_light() is protecting the context.
30889 + */
30890 + if (!ctx->flags.system && req.load_pid != current->pid) {
30891 + ret = pfm_get_task(ctx, req.load_pid, &task, &resume);
30892 + if (ret)
30893 + goto error;
30894 + }
30895 +
30896 + /*
30897 + * irqsave is required to avoid race in case context is already
30898 + * loaded or with switch timeout in the case of self-monitoring
30899 + */
30900 + spin_lock_irqsave(&ctx->lock, flags);
30901 +
30902 + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &dummy_resume);
30903 + if (!ret)
30904 + ret = __pfm_load_context(ctx, &req, task);
30905 +
30906 + spin_unlock_irqrestore(&ctx->lock, flags);
30907 +
30908 + if (resume)
30909 + pfm_resume_task(task, resume);
30910 +
30911 + /*
30912 + * in per-thread mode (not self-monitoring), we need
30913 + * to decrease refcount on task to monitor:
30914 + * - load successful: we have a reference to the task in ctx->task
30915 + * - load failed : undo the effect of pfm_get_task()
30916 + */
30917 + if (task != current)
30918 + put_task_struct(task);
30919 +error:
30920 + pfm_release_ctx_from_fd(&cookie);
30921 + return ret;
30922 +}
30923 +
30924 +asmlinkage long sys_pfm_unload_context(int fd)
30925 +{
30926 + struct pfm_context *ctx;
30927 + struct task_struct *task;
30928 + struct pfm_syscall_cookie cookie;
30929 + void *resume;
30930 + unsigned long flags;
30931 + int ret;
30932 + int is_system, release_info = 0;
30933 + u32 cpu;
30934 +
30935 + PFM_DBG("fd=%d", fd);
30936 +
30937 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30938 + if (ret)
30939 + return ret;
30940 +
30941 + is_system = ctx->flags.system;
30942 +
30943 + spin_lock_irqsave(&ctx->lock, flags);
30944 +
30945 + cpu = ctx->cpu;
30946 + task = ctx->task;
30947 +
30948 + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED|PFM_CMD_UNLOAD,
30949 + &flags, &resume);
30950 + if (!ret)
30951 + ret = __pfm_unload_context(ctx, &release_info);
30952 +
30953 + spin_unlock_irqrestore(&ctx->lock, flags);
30954 +
30955 + if (resume)
30956 + pfm_resume_task(task, resume);
30957 +
30958 + /*
30959 + * cancel time now that context is unlocked
30960 + * avoid race with pfm_handle_switch_timeout()
30961 + */
30962 + if (release_info & 0x2) {
30963 + int r;
30964 + r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
30965 + PFM_DBG("timeout cancel=%d", r);
30966 + }
30967 +
30968 + if (release_info & 0x1)
30969 + pfm_session_release(is_system, cpu);
30970 +
30971 + pfm_release_ctx_from_fd(&cookie);
30972 + return ret;
30973 +}
30974 +
30975 +asmlinkage long sys_pfm_create_evtsets(int fd, struct pfarg_setdesc __user *ureq, int count)
30976 +{
30977 + struct pfm_context *ctx;
30978 + struct pfm_syscall_cookie cookie;
30979 + struct pfarg_setdesc *req;
30980 + void *fptr, *resume;
30981 + unsigned long flags;
30982 + size_t sz;
30983 + int ret;
30984 +
30985 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
30986 +
30987 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
30988 + return -EINVAL;
30989 +
30990 + sz = count*sizeof(*ureq);
30991 +
30992 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
30993 + if (ret)
30994 + return ret;
30995 +
30996 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
30997 + if (ret)
30998 + goto error;
30999 +
31000 + /*
31001 + * must mask interrupts because we do not know the state of context,
31002 + * could be attached and we could be getting PMU interrupts. So
31003 + * we mask and lock context and we check and possibly relax masking
31004 + */
31005 + spin_lock_irqsave(&ctx->lock, flags);
31006 +
31007 + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume);
31008 + if (!ret)
31009 + ret = __pfm_create_evtsets(ctx, req, count);
31010 +
31011 + spin_unlock_irqrestore(&ctx->lock, flags);
31012 + /*
31013 + * context must be unloaded for this command. The resume pointer
31014 + * is necessarily NULL, thus no need to call pfm_resume_task()
31015 + */
31016 + kfree(fptr);
31017 +
31018 +error:
31019 + pfm_release_ctx_from_fd(&cookie);
31020 + return ret;
31021 +}
31022 +
31023 +asmlinkage long sys_pfm_getinfo_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count)
31024 +{
31025 + struct pfm_context *ctx;
31026 + struct task_struct *task;
31027 + struct pfm_syscall_cookie cookie;
31028 + struct pfarg_setinfo *req;
31029 + void *fptr, *resume;
31030 + unsigned long flags;
31031 + size_t sz;
31032 + int ret;
31033 +
31034 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
31035 +
31036 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
31037 + return -EINVAL;
31038 +
31039 + sz = count*sizeof(*ureq);
31040 +
31041 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
31042 + if (ret)
31043 + return ret;
31044 +
31045 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
31046 + if (ret)
31047 + goto error;
31048 +
31049 + /*
31050 + * this command operates even when context is loaded, so we need
31051 + * to keep interrupts masked to avoid a race with PMU interrupt
31052 + * which may switch the active set
31053 + */
31054 + spin_lock_irqsave(&ctx->lock, flags);
31055 +
31056 + task = ctx->task;
31057 +
31058 + ret = pfm_check_task_state(ctx, 0, &flags, &resume);
31059 + if (!ret)
31060 + ret = __pfm_getinfo_evtsets(ctx, req, count);
31061 +
31062 + spin_unlock_irqrestore(&ctx->lock, flags);
31063 +
31064 + if (resume)
31065 + pfm_resume_task(task, resume);
31066 +
31067 + if (copy_to_user(ureq, req, sz))
31068 + ret = -EFAULT;
31069 +
31070 + kfree(fptr);
31071 +error:
31072 + pfm_release_ctx_from_fd(&cookie);
31073 + return ret;
31074 +}
31075 +
31076 +asmlinkage long sys_pfm_delete_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count)
31077 +{
31078 + struct pfm_context *ctx;
31079 + struct pfm_syscall_cookie cookie;
31080 + struct pfarg_setinfo *req;
31081 + void *fptr, *resume;
31082 + unsigned long flags;
31083 + size_t sz;
31084 + int ret;
31085 +
31086 + PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
31087 +
31088 + if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
31089 + return -EINVAL;
31090 +
31091 + sz = count*sizeof(*ureq);
31092 +
31093 + ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
31094 + if (ret)
31095 + return ret;
31096 +
31097 + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
31098 + if (ret)
31099 + goto error;
31100 +
31101 + /*
31102 + * must mask interrupts because we do not know the state of context,
31103 + * could be attached and we could be getting PMU interrupts
31104 + */
31105 + spin_lock_irqsave(&ctx->lock, flags);
31106 +
31107 + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume);
31108 + if (!ret)
31109 + ret = __pfm_delete_evtsets(ctx, req, count);
31110 +
31111 + spin_unlock_irqrestore(&ctx->lock, flags);
31112 + /*
31113 + * context must be unloaded for this command. The resume pointer
31114 + * is necessarily NULL, thus no need to call pfm_resume_task()
31115 + */
31116 + kfree(fptr);
31117 +
31118 +error:
31119 + pfm_release_ctx_from_fd(&cookie);
31120 + return ret;
31121 +}
31122 diff --git a/perfmon/perfmon_sysfs.c b/perfmon/perfmon_sysfs.c
31123 new file mode 100644
31124 index 0000000..7353c3b
31125 --- /dev/null
31126 +++ b/perfmon/perfmon_sysfs.c
31127 @@ -0,0 +1,525 @@
31128 +/*
31129 + * perfmon_sysfs.c: perfmon2 sysfs interface
31130 + *
31131 + * This file implements the perfmon2 interface which
31132 + * provides access to the hardware performance counters
31133 + * of the host processor.
31134 + *
31135 + * The initial version of perfmon.c was written by
31136 + * Ganesh Venkitachalam, IBM Corp.
31137 + *
31138 + * Then it was modified for perfmon-1.x by Stephane Eranian and
31139 + * David Mosberger, Hewlett Packard Co.
31140 + *
31141 + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
31142 + * by Stephane Eranian, Hewlett Packard Co.
31143 + *
31144 + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
31145 + * Contributed by Stephane Eranian <eranian@hpl.hp.com>
31146 + * David Mosberger-Tang <davidm@hpl.hp.com>
31147 + *
31148 + * More information about perfmon available at:
31149 + * http://perfmon2.sf.net
31150 + *
31151 + * This program is free software; you can redistribute it and/or
31152 + * modify it under the terms of version 2 of the GNU General Public
31153 + * License as published by the Free Software Foundation.
31154 + *
31155 + * This program is distributed in the hope that it will be useful,
31156 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
31157 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31158 + * General Public License for more details.
31159 + *
31160 + * You should have received a copy of the GNU General Public License
31161 + * along with this program; if not, write to the Free Software
31162 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31163 + * 02111-1307 USA
31164 + */
31165 +#include <linux/kernel.h>
31166 +#include <linux/module.h> /* for EXPORT_SYMBOL */
31167 +#include <linux/perfmon_kern.h>
31168 +#include "perfmon_priv.h"
31169 +
31170 +struct pfm_attribute {
31171 + struct attribute attr;
31172 + ssize_t (*show)(void *, struct pfm_attribute *attr, char *);
31173 + ssize_t (*store)(void *, const char *, size_t);
31174 +};
31175 +#define to_attr(n) container_of(n, struct pfm_attribute, attr);
31176 +
31177 +#define PFM_RO_ATTR(_name, _show) \
31178 + struct kobj_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
31179 +
31180 +#define PFM_RW_ATTR(_name, _show, _store) \
31181 + struct kobj_attribute attr_##_name = __ATTR(_name, 0644, _show, _store)
31182 +
31183 +#define PFM_ROS_ATTR(_name, _show) \
31184 + struct pfm_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
31185 +
31186 +#define is_attr_name(a, n) (!strcmp((a)->attr.name, n))
31187 +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
31188 +
31189 +static struct kobject *pfm_kernel_kobj, *pfm_fmt_kobj;
31190 +static struct kobject *pfm_pmu_kobj;
31191 +
31192 +static ssize_t pfm_regs_attr_show(struct kobject *kobj,
31193 + struct attribute *attr, char *buf)
31194 +{
31195 + struct pfm_regmap_desc *reg = to_reg(kobj);
31196 + struct pfm_attribute *attribute = to_attr(attr);
31197 + return attribute->show ? attribute->show(reg, attribute, buf) : -EIO;
31198 +}
31199 +
31200 +static ssize_t pfm_fmt_attr_show(struct kobject *kobj,
31201 + struct attribute *attr, char *buf)
31202 +{
31203 + struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj);
31204 + struct pfm_attribute *attribute = to_attr(attr);
31205 + return attribute->show ? attribute->show(fmt, attribute, buf) : -EIO;
31206 +}
31207 +
31208 +static struct sysfs_ops pfm_regs_sysfs_ops = {
31209 + .show = pfm_regs_attr_show
31210 +};
31211 +
31212 +static struct sysfs_ops pfm_fmt_sysfs_ops = {
31213 + .show = pfm_fmt_attr_show
31214 +};
31215 +
31216 +static struct kobj_type pfm_regs_ktype = {
31217 + .sysfs_ops = &pfm_regs_sysfs_ops,
31218 +};
31219 +
31220 +static struct kobj_type pfm_fmt_ktype = {
31221 + .sysfs_ops = &pfm_fmt_sysfs_ops,
31222 +};
31223 +
31224 +static ssize_t pfm_controls_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
31225 +{
31226 + int base;
31227 +
31228 + if (is_attr_name(attr, "version"))
31229 + return snprintf(buf, PAGE_SIZE, "%u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN);
31230 +
31231 + if (is_attr_name(attr, "task_sessions_count"))
31232 + return pfm_sysfs_res_show(buf, PAGE_SIZE, 0);
31233 +
31234 + if (is_attr_name(attr, "debug"))
31235 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug);
31236 +
31237 + if (is_attr_name(attr, "task_group"))
31238 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group);
31239 +
31240 + if (is_attr_name(attr, "mode"))
31241 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.flags);
31242 +
31243 + if (is_attr_name(attr, "arg_mem_max"))
31244 + return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_mem_max);
31245 +
31246 + if (is_attr_name(attr, "syscall")) {
31247 + base = pfm_arch_get_base_syscall();
31248 + return snprintf(buf, PAGE_SIZE, "%d\n", base);
31249 + }
31250 +
31251 + if (is_attr_name(attr, "sys_sessions_count"))
31252 + return pfm_sysfs_res_show(buf, PAGE_SIZE, 1);
31253 +
31254 + if (is_attr_name(attr, "smpl_buffer_mem_max"))
31255 + return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buffer_mem_max);
31256 +
31257 + if (is_attr_name(attr, "smpl_buffer_mem_cur"))
31258 + return pfm_sysfs_res_show(buf, PAGE_SIZE, 2);
31259 +
31260 + if (is_attr_name(attr, "sys_group"))
31261 + return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group);
31262 +
31263 + /* XXX: could be set to write-only */
31264 + if (is_attr_name(attr, "reset_stats")) {
31265 + buf[0] = '0';
31266 + buf[1] = '\0';
31267 + return strnlen(buf, PAGE_SIZE);
31268 + }
31269 + return 0;
31270 +}
31271 +
31272 +static ssize_t pfm_controls_store(struct kobject *kobj, struct kobj_attribute *attr,
31273 + const char *buf, size_t count)
31274 +{
31275 + int i;
31276 + size_t d;
31277 +
31278 + if (sscanf(buf, "%zu", &d) != 1)
31279 + goto skip;
31280 +
31281 + if (is_attr_name(attr, "debug"))
31282 + pfm_controls.debug = d;
31283 +
31284 + if (is_attr_name(attr, "task_group"))
31285 + pfm_controls.task_group = d;
31286 +
31287 + if (is_attr_name(attr, "sys_group"))
31288 + pfm_controls.sys_group = d;
31289 +
31290 + if (is_attr_name(attr, "mode"))
31291 + pfm_controls.flags = d ? PFM_CTRL_FL_RW_EXPERT : 0;
31292 +
31293 + if (is_attr_name(attr, "arg_mem_max")) {
31294 + /*
31295 + * we impose a page as the minimum.
31296 + *
31297 + * This limit may be smaller than the stack buffer
31298 + * available and that is fine.
31299 + */
31300 + if (d >= PAGE_SIZE)
31301 + pfm_controls.arg_mem_max = d;
31302 + }
31303 + if (is_attr_name(attr, "reset_stats")) {
31304 + for_each_online_cpu(i) {
31305 + pfm_reset_stats(i);
31306 + }
31307 + }
31308 +
31309 + if (is_attr_name(attr, "smpl_buffer_mem_max")) {
31310 + if (d >= PAGE_SIZE)
31311 + pfm_controls.smpl_buffer_mem_max = d;
31312 + }
31313 +skip:
31314 + return count;
31315 +}
31316 +
31317 +/*
31318 + * /sys/kernel/perfmon attributes
31319 + */
31320 +static PFM_RO_ATTR(version, pfm_controls_show);
31321 +static PFM_RO_ATTR(task_sessions_count, pfm_controls_show);
31322 +static PFM_RO_ATTR(syscall, pfm_controls_show);
31323 +static PFM_RO_ATTR(sys_sessions_count, pfm_controls_show);
31324 +static PFM_RO_ATTR(smpl_buffer_mem_cur, pfm_controls_show);
31325 +
31326 +static PFM_RW_ATTR(debug, pfm_controls_show, pfm_controls_store);
31327 +static PFM_RW_ATTR(task_group, pfm_controls_show, pfm_controls_store);
31328 +static PFM_RW_ATTR(mode, pfm_controls_show, pfm_controls_store);
31329 +static PFM_RW_ATTR(sys_group, pfm_controls_show, pfm_controls_store);
31330 +static PFM_RW_ATTR(arg_mem_max, pfm_controls_show, pfm_controls_store);
31331 +static PFM_RW_ATTR(smpl_buffer_mem_max, pfm_controls_show, pfm_controls_store);
31332 +static PFM_RW_ATTR(reset_stats, pfm_controls_show, pfm_controls_store);
31333 +
31334 +static struct attribute *pfm_kernel_attrs[] = {
31335 + &attr_version.attr,
31336 + &attr_syscall.attr,
31337 + &attr_task_sessions_count.attr,
31338 + &attr_sys_sessions_count.attr,
31339 + &attr_smpl_buffer_mem_cur.attr,
31340 + &attr_debug.attr,
31341 + &attr_reset_stats.attr,
31342 + &attr_sys_group.attr,
31343 + &attr_task_group.attr,
31344 + &attr_mode.attr,
31345 + &attr_smpl_buffer_mem_max.attr,
31346 + &attr_arg_mem_max.attr,
31347 + NULL
31348 +};
31349 +
31350 +static struct attribute_group pfm_kernel_attr_group = {
31351 + .attrs = pfm_kernel_attrs,
31352 +};
31353 +
31354 +/*
31355 + * per-reg attributes
31356 + */
31357 +static ssize_t pfm_reg_show(void *data, struct pfm_attribute *attr, char *buf)
31358 +{
31359 + struct pfm_regmap_desc *reg;
31360 + int w;
31361 +
31362 + reg = data;
31363 +
31364 + if (is_attr_name(attr, "name"))
31365 + return snprintf(buf, PAGE_SIZE, "%s\n", reg->desc);
31366 +
31367 + if (is_attr_name(attr, "dfl_val"))
31368 + return snprintf(buf, PAGE_SIZE, "0x%llx\n",
31369 + (unsigned long long)reg->dfl_val);
31370 +
31371 + if (is_attr_name(attr, "width")) {
31372 + w = (reg->type & PFM_REG_C64) ?
31373 + pfm_pmu_conf->counter_width : 64;
31374 + return snprintf(buf, PAGE_SIZE, "%d\n", w);
31375 + }
31376 +
31377 + if (is_attr_name(attr, "rsvd_msk"))
31378 + return snprintf(buf, PAGE_SIZE, "0x%llx\n",
31379 + (unsigned long long)reg->rsvd_msk);
31380 +
31381 + if (is_attr_name(attr, "addr"))
31382 + return snprintf(buf, PAGE_SIZE, "0x%lx\n", reg->hw_addr);
31383 +
31384 + return 0;
31385 +}
31386 +
31387 +static PFM_ROS_ATTR(name, pfm_reg_show);
31388 +static PFM_ROS_ATTR(dfl_val, pfm_reg_show);
31389 +static PFM_ROS_ATTR(rsvd_msk, pfm_reg_show);
31390 +static PFM_ROS_ATTR(width, pfm_reg_show);
31391 +static PFM_ROS_ATTR(addr, pfm_reg_show);
31392 +
31393 +static struct attribute *pfm_reg_attrs[] = {
31394 + &attr_name.attr,
31395 + &attr_dfl_val.attr,
31396 + &attr_rsvd_msk.attr,
31397 + &attr_width.attr,
31398 + &attr_addr.attr,
31399 + NULL
31400 +};
31401 +
31402 +static struct attribute_group pfm_reg_attr_group = {
31403 + .attrs = pfm_reg_attrs,
31404 +};
31405 +
31406 +static ssize_t pfm_pmu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
31407 +{
31408 + if (is_attr_name(attr, "model"))
31409 + return snprintf(buf, PAGE_SIZE, "%s\n", pfm_pmu_conf->pmu_name);
31410 + return 0;
31411 +}
31412 +static PFM_RO_ATTR(model, pfm_pmu_show);
31413 +
31414 +static struct attribute *pfm_pmu_desc_attrs[] = {
31415 + &attr_model.attr,
31416 + NULL
31417 +};
31418 +
31419 +static struct attribute_group pfm_pmu_desc_attr_group = {
31420 + .attrs = pfm_pmu_desc_attrs,
31421 +};
31422 +
31423 +static int pfm_sysfs_add_pmu_regs(struct pfm_pmu_config *pmu)
31424 +{
31425 + struct pfm_regmap_desc *reg;
31426 + unsigned int i, k;
31427 + int ret;
31428 +
31429 + reg = pmu->pmc_desc;
31430 + for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
31431 +
31432 + if (!(reg->type & PFM_REG_I))
31433 + continue;
31434 +
31435 + ret = kobject_init_and_add(&reg->kobj, &pfm_regs_ktype,
31436 + pfm_pmu_kobj, "pmc%u", i);
31437 + if (ret)
31438 + goto undo_pmcs;
31439 +
31440 + ret = sysfs_create_group(&reg->kobj, &pfm_reg_attr_group);
31441 + if (ret) {
31442 + kobject_del(&reg->kobj);
31443 + goto undo_pmcs;
31444 + }
31445 + }
31446 +
31447 + reg = pmu->pmd_desc;
31448 + for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
31449 +
31450 + if (!(reg->type & PFM_REG_I))
31451 + continue;
31452 +
31453 + ret = kobject_init_and_add(&reg->kobj, &pfm_regs_ktype,
31454 + pfm_pmu_kobj, "pmd%u", i);
31455 + if (ret)
31456 + goto undo_pmds;
31457 +
31458 + ret = sysfs_create_group(&reg->kobj, &pfm_reg_attr_group);
31459 + if (ret) {
31460 + kobject_del(&reg->kobj);
31461 + goto undo_pmds;
31462 + }
31463 + }
31464 + return 0;
31465 +undo_pmds:
31466 + reg = pmu->pmd_desc;
31467 + for (k = 0; k < i; k++, reg++) {
31468 + if (!(reg->type & PFM_REG_I))
31469 + continue;
31470 + sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
31471 + kobject_del(&reg->kobj);
31472 + }
31473 + i = pmu->num_pmc_entries;
31474 + /* fall through */
31475 +undo_pmcs:
31476 + reg = pmu->pmc_desc;
31477 + for (k = 0; k < i; k++, reg++) {
31478 + if (!(reg->type & PFM_REG_I))
31479 + continue;
31480 + sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
31481 + kobject_del(&reg->kobj);
31482 + }
31483 + return ret;
31484 +}
31485 +
31486 +static int pfm_sysfs_del_pmu_regs(struct pfm_pmu_config *pmu)
31487 +{
31488 + struct pfm_regmap_desc *reg;
31489 + unsigned int i;
31490 +
31491 + reg = pmu->pmc_desc;
31492 + for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
31493 +
31494 + if (!(reg->type & PFM_REG_I))
31495 + continue;
31496 +
31497 + sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
31498 + kobject_del(&reg->kobj);
31499 + }
31500 +
31501 + reg = pmu->pmd_desc;
31502 + for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
31503 +
31504 + if (!(reg->type & PFM_REG_I))
31505 + continue;
31506 +
31507 + sysfs_remove_group(&reg->kobj, &pfm_reg_attr_group);
31508 + kobject_del(&reg->kobj);
31509 + }
31510 + return 0;
31511 +}
31512 +
31513 +/*
31514 + * when a PMU description module is inserted, we create
31515 + * a pmu_desc subdir in sysfs and we populate it with
31516 + * PMU specific information, such as register mappings
31517 + */
31518 +int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu)
31519 +{
31520 + int ret;
31521 +
31522 + pfm_pmu_kobj = kobject_create_and_add("pmu_desc", pfm_kernel_kobj);
31523 + if (!pfm_pmu_kobj)
31524 + return -ENOMEM;
31525 +
31526 + ret = sysfs_create_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
31527 + if (ret) {
31528 + /* will release pfm_pmu_kobj */
31529 + kobject_put(pfm_pmu_kobj);
31530 + return ret;
31531 + }
31532 +
31533 + ret = pfm_sysfs_add_pmu_regs(pmu);
31534 + if (ret) {
31535 + sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
31536 + /* will release pfm_pmu_kobj */
31537 + kobject_put(pfm_pmu_kobj);
31538 + } else
31539 + kobject_uevent(pfm_pmu_kobj, KOBJ_ADD);
31540 +
31541 + return ret;
31542 +}
31543 +
31544 +/*
31545 + * when a PMU description module is removed, we also remove
31546 + * all its information from sysfs, i.e., the pmu_desc subdir
31547 + * disappears
31548 + */
31549 +int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu)
31550 +{
31551 + pfm_sysfs_del_pmu_regs(pmu);
31552 + sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
31553 + kobject_uevent(pfm_pmu_kobj, KOBJ_REMOVE);
31554 + kobject_put(pfm_pmu_kobj);
31555 + pfm_pmu_kobj = NULL;
31556 + return 0;
31557 +}
31558 +
31559 +static ssize_t pfm_fmt_show(void *data, struct pfm_attribute *attr, char *buf)
31560 +{
31561 + struct pfm_smpl_fmt *fmt = data;
31562 +
31563 + if (is_attr_name(attr, "version"))
31564 + return snprintf(buf, PAGE_SIZE, "%u.%u\n",
31565 + fmt->fmt_version >> 16 & 0xffff,
31566 + fmt->fmt_version & 0xffff);
31567 + return 0;
31568 +}
31569 +
31570 +/*
31571 + * do not use predefined macros because of name conflict
31572 + * with /sys/kernel/perfmon/version
31573 + */
31574 +struct pfm_attribute attr_fmt_version = {
31575 + .attr = { .name = "version", .mode = 0444 },
31576 + .show = pfm_fmt_show,
31577 +};
31578 +
31579 +static struct attribute *pfm_fmt_attrs[] = {
31580 + &attr_fmt_version.attr,
31581 + NULL
31582 +};
31583 +
31584 +static struct attribute_group pfm_fmt_attr_group = {
31585 + .attrs = pfm_fmt_attrs,
31586 +};
31587 +
31588 +/*
31589 + * when a sampling format module is inserted, we populate
31590 + * sysfs with some information
31591 + */
31592 +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt)
31593 +{
31594 + int ret;
31595 +
31596 + ret = kobject_init_and_add(&fmt->kobj, &pfm_fmt_ktype,
31597 + pfm_fmt_kobj, fmt->fmt_name);
31598 + if (ret)
31599 + return ret;
31600 +
31601 + ret = sysfs_create_group(&fmt->kobj, &pfm_fmt_attr_group);
31602 + if (ret)
31603 + kobject_del(&fmt->kobj);
31604 + else
31605 + kobject_uevent(&fmt->kobj, KOBJ_ADD);
31606 +
31607 + return ret;
31608 +}
31609 +
31610 +/*
31611 + * when a sampling format module is removed, its information
31612 + * must also be removed from sysfs
31613 + */
31614 +void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt)
31615 +{
31616 + sysfs_remove_group(&fmt->kobj, &pfm_fmt_attr_group);
31617 + kobject_uevent(&fmt->kobj, KOBJ_REMOVE);
31618 + kobject_del(&fmt->kobj);
31619 +}
31620 +
31621 +int __init pfm_init_sysfs(void)
31622 +{
31623 + int ret;
31624 +
31625 + pfm_kernel_kobj = kobject_create_and_add("perfmon", kernel_kobj);
31626 + if (!pfm_kernel_kobj) {
31627 + PFM_ERR("cannot add kernel object: /sys/kernel/perfmon");
31628 + return -ENOMEM;
31629 + }
31630 +
31631 + ret = sysfs_create_group(pfm_kernel_kobj, &pfm_kernel_attr_group);
31632 + if (ret) {
31633 + kobject_put(pfm_kernel_kobj);
31634 + return ret;
31635 + }
31636 +
31637 + pfm_fmt_kobj = kobject_create_and_add("formats", pfm_kernel_kobj);
31638 + if (ret) {
31639 + PFM_ERR("cannot add fmt object: %d", ret);
31640 + goto error_fmt;
31641 + }
31642 + if (pfm_pmu_conf)
31643 + pfm_sysfs_add_pmu(pfm_pmu_conf);
31644 +
31645 + pfm_sysfs_builtin_fmt_add();
31646 +
31647 + return 0;
31648 +
31649 +error_fmt:
31650 + kobject_del(pfm_kernel_kobj);
31651 + return ret;
31652 +}
This page took 1.439863 seconds and 5 git commands to generate.