如何解决我不能在 PERF_COUNT_HW_CACHE_LL 类型中使用“perf_event_open()”
按照 perf_event_open()
man page 的建议,我使用 libpfm4
(man page) 创建其 perf_event_attr
属性。
pfm_perf_encode_arg_t arg;
struct perf_event_attr pea;
(...)
arg.attr = &pea;
ret = pfm_get_os_event_encoding(
"PERF_COUNT_HW_CACHE_LL:READ:ACCESS:u",PFM_PLM3,PFM_OS_PERF_EVENT_EXT,&arg);
在 arg
和 pea
中打印数据,我得到:
arg.fstr : perf::PERF_COUNT_HW_CACHE_LL:READ:ACCESS:u=1:k=0:h=0:precise=0:excl=0:mg=0:mh=1
pea.type : PERF_TYPE_HW_CACHE
pea.config : 0x2
其中 pea.config
与 perf_event_open()
man page 中的公式一致。
config = (perf_hw_cache_id) |
(perf_hw_cache_op_id << 8) |
(perf_hw_cache_op_result_id << 16);
检查 pea
的位标志,似乎也一切正常。但是当我在 pea
中使用 perf_event_open()
时,我得到一个文件描述符 -1
。
int fd;
fd = perf_event_open(&pea,-1,0);
if (fd == -1){
fprintf(stderr,"Error: Couldn't open leader %lXh\n",pea.config);
exit(EXIT_FAILURE);
}
我使用的是 AMD Zen2 处理器。
我尝试过的其他方法
A) 如果我将 pea.type
更改为 PERF_TYPE_RAW
并使用 pea.config
中的原始值 provided by AMD,
L3 Accesses : L3Event[0xFF0F00000040FF04]
L3 Miss (includes Chg2X) : L3Event[0xFF0F000000400104]
我可以打开并读取值。然而,它们小得令人难以置信(访问或未命中少于 20)。我想我在这里读到的东西完全错误。
B) 相比之下,如果我继续使用 libpfm4 来测量 gereric CACHE-MISSES
或 CACHE-REFERENCES
,根据 perf_event_open()
man page 通常映射到 LLC,我得到 10^10 数量级的计数。我不喜欢这个解决方案,因为我不知道我实际测量的是什么。
我弄乱的代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <err.h>
#include <perfmon/pfmlib.h>
#include <perfmon/pfmlib_perf_event.h>
#define USE_LIBPFM4 1
//Print a number @in in its binary form
void printb(char *open,uint64_t in,char *close){
int i,j,str_size = 64 + 3*3 + 4*3 + 1; //bits + spaces + \0
char str[str_size];
memset(str,str_size);
for(i=j=0; i<64; i++){
if(1ULL<<i & in) str[j] = '1';
else str[j] = '_';
if((i+1)%4 == 0 && i+1 < 64){
j++;
if((i+1)%16 == 0){
str[j] = ' '; j++;
str[j] = ' '; j++;
str[j] = ' ';
}else str[j] = ' ';
}
j++;
}
printf("%s",open);
for(i=str_size-1; i>=0; i--) printf("%c",str[i]);
printf("%s",close);
}
int main(int argc,char **argv) {
/**
** INITIALIZE
**/
int ret;
ret = pfm_initialize();
if (ret != PFM_SUCCESS)
errx(1,"cannot initialize library %s",pfm_strerror(ret));
/**
** SETUP perf_event_attr THROUGH libpfm4 OR MANUALLY
**/
pfm_perf_encode_arg_t arg;
struct perf_event_attr pea;
char full_name[512],*fn = full_name;
memset(&arg,sizeof(arg));
memset(&pea,sizeof(pea));
memset(fn,512);
arg.attr = &pea;
arg.fstr = &fn;
arg.size = sizeof(pfm_perf_encode_arg_t);
pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
/**
** Some available counter names:
** "RETIRED_INSTRUCTIONS"
** "CACHE-MISSES" : generic of perf. It "usually"
** refers to LL cache. It counts a lot
** "PERF_COUNT_HW_CACHE_LL:READ:MISS:u" : The thing I want to read.
** It does not work :(
**/
if(USE_LIBPFM4){
ret = pfm_get_os_event_encoding("PERF_COUNT_HW_CACHE_LL:READ:MISS:u",&arg);
if (ret != PFM_SUCCESS)
errx(1,"cannot get encoding %s",pfm_strerror(ret));
}else{
pea.type = PERF_TYPE_RAW;
strncpy(*arg.fstr,"RAW_MISS",8); //made up name
pea.config = 0xFF0F000000400104; //from AMD's page
pea.exclude_kernel = 1;
pea.exclude_hv = 1;
pea.exclude_guest = 1;
}
/**
** PEEKING INTO @arg AND @pea
**/
char *type[] = {"PERF_TYPE_HARDWARE","PERF_TYPE_SOFTWARE","PERF_TYPE_TRACEPOINT","PERF_TYPE_HW_CACHE","PERF_TYPE_RAW","PERF_TYPE_BREAKPOINT"};
uint64_t *flags = (uint64_t*) ((char*)&pea + 2*sizeof(uint32_t)
+ 4*sizeof(uint64_t));
printf(" arg.fstr : %s\n",*arg.fstr);
printf(" pea.type : %s\n",type[pea.type]);
printb("pea.config : ",pea.config,"\n");
printb(" pea.flags : ",*flags,"\n");
printf(" " //flag names' mnemonics
"[ ---reserved--- ]tc "
"abkn wcuc mukg hsmp pwte ifcm ihku epid\n");
/**
** OPEN PERF EVENT
**/
int fd; //event file descriptor
fd = perf_event_open(&pea,0);
if (fd == -1){
fprintf(stderr,pea.config);
exit(EXIT_FAILURE);
}
/**
** MEASURE A JUMPY FOR LOOP
**/
int *p = (int*) malloc(800000*sizeof(int));
if(!p){
fprintf(stderr,"Error: Malloc is angry\n");
exit(-1);
}
ioctl(fd,PERF_EVENT_IOC_RESET,0); // ready... set...
ioctl(fd,PERF_EVENT_IOC_ENABLE,0); // go!
size_t n,o;
for(size_t i=0; i < 10000000000; i++){
n = (i*347697+997)%800000;
p[n] = n;
}
ioctl(fd,PERF_EVENT_IOC_disABLE,0); // stop
/**
** READ COUNT OUT
**/
struct read_format {
uint64_t nr;
struct {
uint64_t val;
uint64_t id;
} rec[64];
} rf;
read(fd,&rf,sizeof(rf));
for(int i=0; i < rf.nr; i++)
fprintf(stderr,"%2d) ID(%ld): %ld\n",i,rf.rec[i].id,rf.rec[i].val);
return 0;
}
如何衡量实际的 PERF_COUNT_HW_CACHE_LL:READ:MISS:u
事件?
感谢您的时间和帮助。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。