| /**************************************************************************** |
| ** |
| ** Copyright (C) 2016 Intel Corporation. |
| ** Contact: https://www.qt.io/licensing/ |
| ** |
| ** This file is part of the QtCore module of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see https://www.qt.io/terms-conditions. For further |
| ** information use the contact form at https://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 3 as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| ** packaging of this file. Please review the following information to |
| ** ensure the GNU Lesser General Public License version 3 requirements |
| ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| ** |
| ** GNU General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU |
| ** General Public License version 2.0 or (at your option) the GNU General |
| ** Public license version 3 or any later version approved by the KDE Free |
| ** Qt Foundation. The licenses are as published by the Free Software |
| ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| ** included in the packaging of this file. Please review the following |
| ** information to ensure the GNU General Public License requirements will |
| ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| ** https://www.gnu.org/licenses/gpl-3.0.html. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| #include "qbenchmarkperfevents_p.h" |
| #include "qbenchmarkmetric.h" |
| #include "qbenchmark_p.h" |
| |
| #ifdef QTESTLIB_USE_PERF_EVENTS |
| |
| // include the qcore_unix_p.h without core-private |
| // we only use inline functions anyway |
| #include "../corelib/kernel/qcore_unix_p.h" |
| |
| #include <sys/types.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <string.h> |
| #include <stdio.h> |
| |
| #include <sys/syscall.h> |
| #include <sys/ioctl.h> |
| |
| #include "3rdparty/linux_perf_event_p.h" |
| |
| // for PERF_TYPE_HW_CACHE, the config is a bitmask |
| // lowest 8 bits: cache type |
| // bits 8 to 15: cache operation |
| // bits 16 to 23: cache result |
| #define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| #define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) |
| #define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) |
| |
| QT_BEGIN_NAMESPACE |
| |
| static perf_event_attr attr; |
| |
| static void initPerf() |
| { |
| static bool done; |
| if (!done) { |
| memset(&attr, 0, sizeof attr); |
| attr.size = sizeof attr; |
| attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; |
| attr.disabled = true; // we'll enable later |
| attr.inherit = true; // let children processes inherit the monitoring |
| attr.pinned = true; // keep it running in the hardware |
| attr.inherit_stat = true; // aggregate all the info from child processes |
| attr.task = true; // trace fork/exits |
| |
| // set a default performance counter: CPU cycles |
| attr.type = PERF_TYPE_HARDWARE; |
| attr.config = PERF_COUNT_HW_CPU_CYCLES; // default |
| |
| done = true; |
| } |
| } |
| |
| // This class does not exist in the API so it's qdoc comment marker was removed. |
| |
| /* |
| \class QBenchmarkPerfEvents |
| \brief The Linux perf events benchmark backend |
| |
| This benchmark backend uses the Linux Performance Counters interface, |
| introduced with the Linux kernel v2.6.31. The interface is done by one |
| system call (perf_event_open) which takes an attribute structure and |
| returns a file descriptor. |
| |
| More information: |
| \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> |
| \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> |
| (note: as of v3.3.1, the documentation is out-of-date with the kernel |
| interface, so reading the source code of existing tools is necessary) |
| |
| This benchlib backend monitors the current process as well as child process |
| launched. We do not try to benchmark in kernel or hypervisor mode, as that |
| usually requires elevated privileges. |
| */ |
| |
| static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) |
| { |
| #ifdef SYS_perf_event_open |
| return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); |
| #else |
| Q_UNUSED(attr); |
| Q_UNUSED(pid); |
| Q_UNUSED(cpu); |
| Q_UNUSED(group_fd); |
| Q_UNUSED(flags); |
| errno = ENOSYS; |
| return -1; |
| #endif |
| } |
| |
| bool QBenchmarkPerfEventsMeasurer::isAvailable() |
| { |
| // this generates an EFAULT because attr == NULL if perf_event_open is available |
| // if the kernel is too old, it generates ENOSYS |
| return perf_event_open(nullptr, 0, 0, 0, 0) == -1 && errno != ENOSYS; |
| } |
| |
| /* Event list structure |
| The following table provides the list of supported events |
| |
| Event type Event counter Unit Name and aliases |
| HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles |
| HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles |
| HARDWARE INSTRUCTIONS Instructions instructions |
| HARDWARE CACHE_REFERENCES CacheReferences cache-references |
| HARDWARE CACHE_MISSES CacheMisses cache-misses |
| HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches |
| HARDWARE BRANCH_MISSES BranchMisses branch-misses |
| HARDWARE BUS_CYCLES BusCycles bus-cycles |
| HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend |
| HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend |
| SOFTWARE CPU_CLOCK WalltimeMilliseconds cpu-clock |
| SOFTWARE TASK_CLOCK WalltimeMilliseconds task-clock |
| SOFTWARE PAGE_FAULTS PageFaults page-faults faults |
| SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults |
| SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults |
| SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs |
| SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations |
| SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults |
| SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults |
| HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads |
| HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores |
| HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches |
| HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads |
| HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches |
| HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads |
| HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores |
| HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches |
| HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses |
| HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses |
| HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses |
| HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses |
| HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses |
| HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses |
| HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses |
| HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses |
| HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts |
| HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses |
| |
| Use the following Perl script to re-generate the list |
| === cut perl === |
| #!/usr/bin/env perl |
| # Load all entries into %map |
| while (<STDIN>) { |
| m/^\s*(.*)\s*$/; |
| @_ = split /\s+/, $1; |
| $type = shift @_; |
| $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : |
| $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : |
| $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; |
| $unit = shift @_; |
| |
| for $string (@_) { |
| die "$string was already seen!" if defined($map{$string}); |
| $map{$string} = [-1, $type, $id, $unit]; |
| push @strings, $string; |
| } |
| } |
| |
| # sort the map and print the string list |
| @strings = sort @strings; |
| print "static const char eventlist_strings[] = \n"; |
| $counter = 0; |
| for $entry (@strings) { |
| print " \"$entry\\0\"\n"; |
| $map{$entry}[0] = $counter; |
| $counter += 1 + length $entry; |
| } |
| |
| # print the table |
| print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; |
| for $entry (sort @strings) { |
| printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", |
| $map{$entry}[0], |
| $map{$entry}[1], |
| $map{$entry}[2], |
| $map{$entry}[3]; |
| } |
| print " { 0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n"; |
| === cut perl === |
| */ |
| |
| struct Events { |
| unsigned offset; |
| quint32 type; |
| quint64 event_id; |
| QTest::QBenchmarkMetric metric; |
| }; |
| |
| /* -- BEGIN GENERATED CODE -- */ |
| static const char eventlist_strings[] = |
| "alignment-faults\0" |
| "branch-instructions\0" |
| "branch-load-misses\0" |
| "branch-loads\0" |
| "branch-mispredicts\0" |
| "branch-misses\0" |
| "branch-predicts\0" |
| "branch-read-misses\0" |
| "branch-reads\0" |
| "branches\0" |
| "bus-cycles\0" |
| "cache-misses\0" |
| "cache-references\0" |
| "context-switches\0" |
| "cpu-clock\0" |
| "cpu-cycles\0" |
| "cpu-migrations\0" |
| "cs\0" |
| "cycles\0" |
| "emulation-faults\0" |
| "faults\0" |
| "idle-cycles-backend\0" |
| "idle-cycles-frontend\0" |
| "instructions\0" |
| "l1d-cache-load-misses\0" |
| "l1d-cache-loads\0" |
| "l1d-cache-prefetch-misses\0" |
| "l1d-cache-prefetches\0" |
| "l1d-cache-read-misses\0" |
| "l1d-cache-reads\0" |
| "l1d-cache-store-misses\0" |
| "l1d-cache-stores\0" |
| "l1d-cache-write-misses\0" |
| "l1d-cache-writes\0" |
| "l1d-load-misses\0" |
| "l1d-loads\0" |
| "l1d-prefetch-misses\0" |
| "l1d-prefetches\0" |
| "l1d-read-misses\0" |
| "l1d-reads\0" |
| "l1d-store-misses\0" |
| "l1d-stores\0" |
| "l1d-write-misses\0" |
| "l1d-writes\0" |
| "l1i-cache-load-misses\0" |
| "l1i-cache-loads\0" |
| "l1i-cache-prefetch-misses\0" |
| "l1i-cache-prefetches\0" |
| "l1i-cache-read-misses\0" |
| "l1i-cache-reads\0" |
| "l1i-load-misses\0" |
| "l1i-loads\0" |
| "l1i-prefetch-misses\0" |
| "l1i-prefetches\0" |
| "l1i-read-misses\0" |
| "l1i-reads\0" |
| "llc-cache-load-misses\0" |
| "llc-cache-loads\0" |
| "llc-cache-prefetch-misses\0" |
| "llc-cache-prefetches\0" |
| "llc-cache-read-misses\0" |
| "llc-cache-reads\0" |
| "llc-cache-store-misses\0" |
| "llc-cache-stores\0" |
| "llc-cache-write-misses\0" |
| "llc-cache-writes\0" |
| "llc-load-misses\0" |
| "llc-loads\0" |
| "llc-prefetch-misses\0" |
| "llc-prefetches\0" |
| "llc-read-misses\0" |
| "llc-reads\0" |
| "llc-store-misses\0" |
| "llc-stores\0" |
| "llc-write-misses\0" |
| "llc-writes\0" |
| "major-faults\0" |
| "migrations\0" |
| "minor-faults\0" |
| "page-faults\0" |
| "ref-cycles\0" |
| "stalled-cycles-backend\0" |
| "stalled-cycles-frontend\0" |
| "task-clock\0" |
| "\0"; |
| |
| static const Events eventlist[] = { |
| { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults }, |
| { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, |
| { 37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, |
| { 56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, |
| { 69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, |
| { 88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses }, |
| { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, |
| { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, |
| { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, |
| { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, |
| { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles }, |
| { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses }, |
| { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences }, |
| { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, |
| { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeMilliseconds }, |
| { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, |
| { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, |
| { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, |
| { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, |
| { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults }, |
| { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, |
| { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, |
| { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, |
| { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions }, |
| { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches }, |
| { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches }, |
| { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches }, |
| { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches }, |
| { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, |
| { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, |
| { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, |
| { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, |
| { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches }, |
| { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches }, |
| { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches }, |
| { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches }, |
| { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, |
| { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, |
| { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches }, |
| { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches }, |
| { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches }, |
| { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches }, |
| { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, |
| { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, |
| { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, |
| { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, |
| { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults }, |
| { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, |
| { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults }, |
| { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, |
| { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, QTest::RefCPUCycles }, |
| { 1303, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, |
| { 1326, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, |
| { 1350, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeMilliseconds }, |
| { 0, PERF_TYPE_MAX, 0, QTest::Events } |
| }; |
| /* -- END GENERATED CODE -- */ |
| |
| QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id) |
| { |
| const Events *ptr = eventlist; |
| for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
| if (ptr->type == type && ptr->event_id == event_id) |
| return ptr->metric; |
| } |
| return QTest::Events; |
| } |
| |
| void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) |
| { |
| initPerf(); |
| const char *colon = strchr(name, ':'); |
| int n = colon ? colon - name : strlen(name); |
| const Events *ptr = eventlist; |
| for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
| int c = strncmp(name, eventlist_strings + ptr->offset, n); |
| if (c == 0) |
| break; |
| if (c < 0) { |
| fprintf(stderr, "ERROR: Performance counter type '%s' is unknown\n", name); |
| exit(1); |
| } |
| } |
| |
| attr.type = ptr->type; |
| attr.config = ptr->event_id; |
| |
| // now parse the attributes |
| if (!colon) |
| return; |
| while (*++colon) { |
| switch (*colon) { |
| case 'u': |
| attr.exclude_user = true; |
| break; |
| case 'k': |
| attr.exclude_kernel = true; |
| break; |
| case 'h': |
| attr.exclude_hv = true; |
| break; |
| case 'G': |
| attr.exclude_guest = true; |
| break; |
| case 'H': |
| attr.exclude_host = true; |
| break; |
| default: |
| fprintf(stderr, "ERROR: Unknown attribute '%c'\n", *colon); |
| exit(1); |
| } |
| } |
| } |
| |
| void QBenchmarkPerfEventsMeasurer::listCounters() |
| { |
| if (!isAvailable()) { |
| printf("Performance counters are not available on this system\n"); |
| return; |
| } |
| |
| printf("The following performance counters are available:\n"); |
| const Events *ptr = eventlist; |
| for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { |
| printf(" %-30s [%s]\n", eventlist_strings + ptr->offset, |
| ptr->type == PERF_TYPE_HARDWARE ? "hardware" : |
| ptr->type == PERF_TYPE_SOFTWARE ? "software" : |
| ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other"); |
| } |
| |
| printf("\nAttributes can be specified by adding a colon and the following:\n" |
| " u - exclude measuring in the userspace\n" |
| " k - exclude measuring in kernel mode\n" |
| " h - exclude measuring in the hypervisor\n" |
| " G - exclude measuring when running virtualized (guest VM)\n" |
| " H - exclude measuring when running non-virtualized (host system)\n" |
| "Attributes can be combined, for example: -perfcounter branch-mispredicts:kh\n"); |
| } |
| |
| QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default; |
| |
| QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() |
| { |
| qt_safe_close(fd); |
| } |
| |
| void QBenchmarkPerfEventsMeasurer::init() |
| { |
| } |
| |
| void QBenchmarkPerfEventsMeasurer::start() |
| { |
| |
| initPerf(); |
| if (fd == -1) { |
| // pid == 0 -> attach to the current process |
| // cpu == -1 -> monitor on all CPUs |
| // group_fd == -1 -> this is the group leader |
| // flags == 0 -> reserved, must be zero |
| fd = perf_event_open(&attr, 0, -1, -1, 0); |
| if (fd == -1) { |
| perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open"); |
| exit(1); |
| } else { |
| ::fcntl(fd, F_SETFD, FD_CLOEXEC); |
| } |
| } |
| |
| // enable the counter |
| ::ioctl(fd, PERF_EVENT_IOC_RESET); |
| ::ioctl(fd, PERF_EVENT_IOC_ENABLE); |
| } |
| |
| qint64 QBenchmarkPerfEventsMeasurer::checkpoint() |
| { |
| ::ioctl(fd, PERF_EVENT_IOC_DISABLE); |
| qint64 value = readValue(); |
| ::ioctl(fd, PERF_EVENT_IOC_ENABLE); |
| return value; |
| } |
| |
| qint64 QBenchmarkPerfEventsMeasurer::stop() |
| { |
| // disable the counter |
| ::ioctl(fd, PERF_EVENT_IOC_DISABLE); |
| return readValue(); |
| } |
| |
| bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64) |
| { |
| return true; |
| } |
| |
| int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) |
| { |
| return 1; |
| } |
| |
| int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) |
| { |
| return 1; |
| } |
| |
| QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType() |
| { |
| return metricForEvent(attr.type, attr.config); |
| } |
| |
| static quint64 rawReadValue(int fd) |
| { |
| /* from the kernel docs: |
| * struct read_format { |
| * { u64 value; |
| * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED |
| * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING |
| * { u64 id; } && PERF_FORMAT_ID |
| * } && !PERF_FORMAT_GROUP |
| */ |
| |
| struct read_format { |
| quint64 value; |
| quint64 time_enabled; |
| quint64 time_running; |
| } results; |
| |
| size_t nread = 0; |
| while (nread < sizeof results) { |
| char *ptr = reinterpret_cast<char *>(&results); |
| qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread); |
| if (r == -1) { |
| perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results"); |
| exit(1); |
| } |
| nread += quint64(r); |
| } |
| |
| if (results.time_running == results.time_enabled) |
| return results.value; |
| |
| // scale the results, though this shouldn't happen! |
| return results.value * (double(results.time_running) / double(results.time_enabled)); |
| } |
| |
| qint64 QBenchmarkPerfEventsMeasurer::readValue() |
| { |
| quint64 raw = rawReadValue(fd); |
| if (metricType() == QTest::WalltimeMilliseconds) { |
| // perf returns nanoseconds |
| return raw / 1000000; |
| } |
| return raw; |
| } |
| |
| QT_END_NAMESPACE |
| |
| #endif |