微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

创建用于迭代摘要的自定义函数

如何解决创建用于迭代摘要的自定义函数

我创建了一种算法来获取子集列表的摘要

这是我的数据框:

df <- data.frame(
  Name = c("asdf","kjhgf","cvbnm","rtyui","jhfd","sdfghj","dfghj","cvbnm"),sale = c(27,NA,27,16,14,18,28),city = c("CA","TX","MN","NY","MT","HU","KL","SA","TX"),Dept = c("HH","MM","NN","AA","VV","JJ","ZZ")
)

现在,我根据需要创建了一些子集:

df1<- df
df$cc1<-1
#Astellas
df2<- subset(df,Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1,df$cc2)
ldat<-list("ALL" = df1,"MM" =df2)

使用以下算法,我能够获得所需的摘要输出,但是我想创建一个执行相同功能函数。请帮助找出如何创建自定义函数,以得到与以下脚本相同的结果。

df %>%
  select(-Name,-city) %>%
  group_by(Dept) -> dat

  N <- length(dat[[1]])
  Median <- median(dat[[1]])
  Average <- mean(dat[[1]])
  q25 <- quantile(dat[[1]])[2]
  q75 <- quantile(dat[[1]])[4]
  cbind(q25,Median,Average,q75,N) -> ALL
  dat %>% filter(Dept == "MM") -> MM
  N <- length(MM[[1]])
  Median <- median(MM[[1]])
  Average <- mean(MM[[1]])
  q25 <- quantile(MM[[1]])[2]
  q75 <- quantile(MM[[1]])[4]
  cbind(q25,N) -> MM
  as.data.frame(rbind(ALL,MM)) %>%
  `rownames<-`(.,c("ALL","MM")) %>%
    pander::pander() %>% as.data.frame()

我需要将上面的脚本转换成这样的函数

functiont(data=ldat,var = "sale",name_of_var = c("ALL","MM"))

对于变量列表,该函数应该是动态的。在这种情况下,我们有两个变量c("ALL","MM")。所以这应该是动态的。

输出摘要应为flextable,如下所示:

enter image description here

解决方法

检查代码后,便可以使用所需功能。本解决方案中的函数采用所需的参数,但我进行了一些修改以仅使用一个数据帧,这是主要的df。原因是ALL的汇总将始终来自第一个大数据框,因此不需要为全局数据框创建列表,然后为过滤器创建其他对象。该函数产生所需的内容,并且可以接受您提到的任何过滤器。下面是该函数的代码和一些应用程序:

library(dplyr)
#Function
myfun <- function(data,var,name_of_var)
{
  #Data
  dat <- data
  #Compute for all items
  N <- length(dat[[var]])
  Median <- median(dat[[var]])
  Average <- mean(dat[[var]])
  q25 <- quantile(dat[[var]])[2]
  q75 <- quantile(dat[[var]])[4]
  as.data.frame(cbind(q25,Median,Average,q75,N)) -> ALL
  rownames(ALL) <- 'All'
  #Now the filter values
  dat %>% filter(Dept %in% name_of_var) -> MM
  #Create list
  List <- split(MM,MM$Dept)
  #Apply summaries in a function
  compute <- function(x)
  {
    N <- length(x[[var]])
    Median <- median(x[[var]])
    Average <- mean(x[[var]])
    q25 <- quantile(x[[var]])[2]
    q75 <- quantile(x[[var]])[4]
    as.data.frame(cbind(q25,N)) -> y
    rownames(y) <- unique(x$Dept)
    return(y)
  }
  #Apply
  List2 <- do.call(rbind,lapply(List,compute))
  #Bind all
  Binded <- rbind(ALL,List2)
  return(Binded)
}

现在,一些应用程序:

#Apply function 1
Ex1 <- myfun(data=df,var = "sale",name_of_var = c("MM"))

输出:

     q25 Median  Average  q75  N
All 15.0     19 20.90909 27.0 11
MM  15.5     17 19.00000 20.5  4

示例2:

#Apply function 2
Ex2 <- myfun(data=df,name_of_var = c("MM","HH"))

输出:

     q25 Median  Average  q75  N
All 15.0     19 20.90909 27.0 11
HH  27.0     27 27.00000 27.0  1
MM  15.5     17 19.00000 20.5  4

之后,您可以按任何样式设置输出的格式。

,
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","jhfd","sdfghj","dfghj","cvbnm"),sale=c(27,28,27,16,14,25,19,18,28),city=c("CA","TX","MN","NY","MT","HU","KL","SA","TX"),Dept = c("HH","MM","NN","AA","VV","JJ","ZZ"))


df1<- df
df$cc1<-1
df2<- subset(df,Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1,df$cc2)
listd<-list("ALL" = df1,"MM" =df2)

#I want to run my function for listd so that i can get a  combined summary for all variables in listd
tt2<-function(data,footer,Name_of_variable,decimal){
  for (d in 1:length(data)) {
    cat('\n\n#### ',names(data)[d],'\n\n')
    md<-data[[d]]
    table_list<-list()
    for (i in 1:length(d))
      table_list[[i]]<-t1(md,decimal,Name_of_variable)
    tt<- do.call(rbind,table_list)
  } 
  cat(knit_print(tt))
  cat('\n\n')
}
t1<-function(dataset,Suff,Name_of_variable){
  numdig <- if (decimal == TRUE) {1} else {0}
  var <- rlang::parse_expr(var) 
summ_tab1<- dataset %>% filter(!is.na(!!var)) %>%   summarise(
  q25 = format(round(quantile(!! var,type=6,probs = seq(0,0.25),na.rm=TRUE)[2],digits = numdig),nsmall = numdig),Median = format(round(quantile(!! var,na.rm=TRUE)[3],Average = format(round( mean(!! var,na.rm=TRUE),q75 = format(round(quantile(!! var,na.rm=TRUE)[4],N = sum(!is.na(!!var)))
summ_tab<-summ_tab1 %>%  
  mutate(" "=!!Name_of_variable,q25 = q25,Median =Median,Average =Average,q75 = q75)%>%
  dplyr::rename(
    `25th percentile` = q25,`75th percentile` = q75)%>%select(" ",N,everything())
summ_tab1
}


tt2(data = listd,Name_of_variable = "listd",decimal = TRUE)

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。