微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

我不能在 PERF_COUNT_HW_CACHE_LL 类型中使用“perf_event_open()”

如何解决我不能在 PERF_COUNT_HW_CACHE_LL 类型中使用“perf_event_open()”

按照 perf_event_open() man page 的建议,我使用 libpfm4 (man page) 创建其 perf_event_attr 属性

pfm_perf_encode_arg_t arg;
struct perf_event_attr pea;
(...)
arg.attr = &pea;
ret = pfm_get_os_event_encoding(
                  "PERF_COUNT_HW_CACHE_LL:READ:ACCESS:u",PFM_PLM3,PFM_OS_PERF_EVENT_EXT,&arg);

argpea 中打印数据,我得到:

  arg.fstr : perf::PERF_COUNT_HW_CACHE_LL:READ:ACCESS:u=1:k=0:h=0:precise=0:excl=0:mg=0:mh=1
  pea.type : PERF_TYPE_HW_CACHE
pea.config : 0x2

其中 pea.configperf_event_open() man page 中的公式一致。

config = (perf_hw_cache_id) |
         (perf_hw_cache_op_id << 8) |
         (perf_hw_cache_op_result_id << 16);

检查 pea 的位标志,似乎也一切正常。但是当我在 pea 中使用 perf_event_open() 时,我得到一个文件描述符 -1

int fd;
fd = perf_event_open(&pea,-1,0);
if (fd == -1){
    fprintf(stderr,"Error: Couldn't open leader %lXh\n",pea.config);
    exit(EXIT_FAILURE);
}

我使用的是 AMD Zen2 处理器。

我尝试过的其他方法

A) 如果我将 pea.type 更改为 PERF_TYPE_RAW 并使用 pea.config 中的原始值 provided by AMD

L3 Accesses              : L3Event[0xFF0F00000040FF04]
L3 Miss (includes Chg2X) : L3Event[0xFF0F000000400104]

我可以打开并读取值。然而,它们小得令人难以置信(访问或未命中少于 20)。我想我在这里读到的东西完全错误

B) 相比之下,如果我继续使用 libpfm4 来测量 gereric CACHE-MISSESCACHE-REFERENCES,根据 perf_event_open() man page 通常映射到 LLC,我得到 10^10 数量级的计数。我不喜欢这个解决方案,因为我不知道我实际测量的是什么。

我弄乱的代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <err.h>
#include <perfmon/pfmlib.h>
#include <perfmon/pfmlib_perf_event.h>
#define USE_LIBPFM4 1

//Print a number @in in its binary form
void printb(char *open,uint64_t in,char *close){
    int i,j,str_size = 64 + 3*3 + 4*3 + 1; //bits + spaces + \0
    char str[str_size];
    memset(str,str_size);
    for(i=j=0; i<64; i++){
        if(1ULL<<i & in) str[j] = '1';
        else str[j] = '_';
        if((i+1)%4 == 0 && i+1 < 64){
            j++;
            if((i+1)%16 == 0){
                str[j] = ' '; j++;
                str[j] = ' '; j++;
                str[j] = ' ';
            }else str[j] = ' ';
        }
        j++;
    }
    printf("%s",open);
    for(i=str_size-1; i>=0; i--) printf("%c",str[i]);
    printf("%s",close);
}

int main(int argc,char **argv) {
    /**
     ** INITIALIZE
     **/
    int ret;
    ret = pfm_initialize();
    if (ret != PFM_SUCCESS)
        errx(1,"cannot initialize library %s",pfm_strerror(ret));

    /**
     ** SETUP perf_event_attr THROUGH libpfm4 OR MANUALLY
     **/
    pfm_perf_encode_arg_t arg;
    struct perf_event_attr pea;
    char full_name[512],*fn = full_name;
    memset(&arg,sizeof(arg));
    memset(&pea,sizeof(pea));
    memset(fn,512);
    arg.attr = &pea;
    arg.fstr = &fn;
    arg.size = sizeof(pfm_perf_encode_arg_t);
    pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
    /**
     ** Some available counter names:
     ** "RETIRED_INSTRUCTIONS"
     ** "CACHE-MISSES" : generic of perf. It "usually"
     **                  refers to LL cache. It counts a lot
     ** "PERF_COUNT_HW_CACHE_LL:READ:MISS:u" : The thing I want to read.
     **                                        It does not work :(
     **/
    if(USE_LIBPFM4){
        ret = pfm_get_os_event_encoding("PERF_COUNT_HW_CACHE_LL:READ:MISS:u",&arg);
        if (ret != PFM_SUCCESS)
            errx(1,"cannot get encoding %s",pfm_strerror(ret));
    }else{
        pea.type = PERF_TYPE_RAW;
        strncpy(*arg.fstr,"RAW_MISS",8); //made up name
        pea.config = 0xFF0F000000400104; //from AMD's page
        pea.exclude_kernel = 1;
        pea.exclude_hv = 1;
        pea.exclude_guest = 1;
    }

    /**
     ** PEEKING INTO @arg AND @pea
     **/
    char *type[] = {"PERF_TYPE_HARDWARE","PERF_TYPE_SOFTWARE","PERF_TYPE_TRACEPOINT","PERF_TYPE_HW_CACHE","PERF_TYPE_RAW","PERF_TYPE_BREAKPOINT"};
    uint64_t *flags = (uint64_t*) ((char*)&pea + 2*sizeof(uint32_t)
                                   + 4*sizeof(uint64_t));
    printf("  arg.fstr : %s\n",*arg.fstr);
    printf("  pea.type : %s\n",type[pea.type]);
    printb("pea.config : ",pea.config,"\n");
    printb(" pea.flags : ",*flags,"\n");
    printf("             " //flag names' mnemonics
           "[           ---reserved---            ]tc   "
           "abkn wcuc mukg hsmp   pwte ifcm ihku epid\n");

    /**
     ** OPEN PERF EVENT
     **/
    int fd; //event file descriptor
    fd = perf_event_open(&pea,0);
    if (fd == -1){
        fprintf(stderr,pea.config);
        exit(EXIT_FAILURE);
    }
    
    /**
     ** MEASURE A JUMPY FOR LOOP
     **/
    int *p = (int*) malloc(800000*sizeof(int));
    if(!p){
        fprintf(stderr,"Error: Malloc is angry\n");
        exit(-1);
    }
    ioctl(fd,PERF_EVENT_IOC_RESET,0); // ready... set...
    ioctl(fd,PERF_EVENT_IOC_ENABLE,0); // go!
    size_t n,o;
    for(size_t i=0; i < 10000000000; i++){
        n = (i*347697+997)%800000;
        p[n] = n;
    }
    ioctl(fd,PERF_EVENT_IOC_disABLE,0); // stop

    /**
     ** READ COUNT OUT
     **/
    struct read_format {
        uint64_t nr;
        struct {
            uint64_t val;
            uint64_t id;
        } rec[64];
    } rf;
    read(fd,&rf,sizeof(rf));
    for(int i=0; i < rf.nr; i++)
        fprintf(stderr,"%2d) ID(%ld): %ld\n",i,rf.rec[i].id,rf.rec[i].val);
    
    return 0;
}

如何衡量实际的 PERF_COUNT_HW_CACHE_LL:READ:MISS:u 事件?

感谢您的时间和帮助。

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。