如何解决更新自定义函数以按组获取组合的摘要统计信息
我创建了一个函数,用于获取平均值,百分位数的摘要。但我不想要特定子集的摘要。所以我相应地创建了子集。
但是我的功能无法正常工作。
实际上,我实际上是在尝试更新函数,以便获取变量列表的摘要,因为变量名称可以对多个变量列表进行摘要。
我不知道如何在函数中将“ ALL”,“ MM”作为变量名。 这样两者的摘要都可以自己填写
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","jhfd","sdfghj","dfghj","cvbnm"),sale=c(27,28,27,16,14,25,19,18,28),city=c("CA","TX","MN","NY","MT","HU","KL","SA","TX"),Dept = c("HH","MM","NN","AA","VV","JJ","ZZ"))
df1<- df
df$cc1<-1
df2<- subset(df,Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1,df$cc2)
listd<-list("ALL" = df1,"MM" =df2)
#I want to run my function for listd so that i can get a combined summary for all variables in listd
tt2<-function(data,var,footer,Name_of_variable,decimal){
for (d in 1:length(data)) {
cat('\n\n#### ',names(data)[d],'\n\n')
md<-data[[d]]
table_list<-list()
for (i in 1:length(d))
table_list[[i]]<-t1(md,decimal,Name_of_variable)
tt<- do.call(rbind,table_list)
}
cat(knit_print(tt))
cat('\n\n')
}
t1<-function(dataset,Suff,Name_of_variable){
numdig <- if (decimal == TRUE) {1} else {0}
var <- rlang::parse_expr(var)
summ_tab1<- dataset %>% filter(!is.na(!!var)) %>% summarise(
q25 = format(round(quantile(!! var,type=6,probs = seq(0,0.25),na.rm=TRUE)[2],digits = numdig),nsmall = numdig),Median = format(round(quantile(!! var,na.rm=TRUE)[3],Average = format(round( mean(!! var,na.rm=TRUE),q75 = format(round(quantile(!! var,na.rm=TRUE)[4],N = sum(!is.na(!!var)))
summ_tab<-summ_tab1 %>%
mutate(" "=!!Name_of_variable,q25 = q25,Median =Median,Average =Average,q75 = q75)%>%
dplyr::rename(
`25th percentile` = q25,`75th percentile` = q75)%>%select(" ",N,everything())
summ_tab1
}
tt2(data = listd,var = "sale",Name_of_variable = "listd",decimal = TRUE)
以前我得到的摘要如下
解决方法
我已经稍微重写了您的t1
函数,并利用了它返回一个数据帧的事实。可以与purrr::map_dfr
一起使用:
library(dplyr)
df <- data.frame(Name = c("asdf","kjhgf","cvbnm","rtyui","jhfd","sdfghj","dfghj","cvbnm"),sale=c(27,28,27,16,14,25,19,18,28),city=c("CA","TX","MN","NY","MT","HU","KL","SA","TX"),Dept = c("HH","MM","NN","AA","VV","JJ","ZZ"))
df1<- df
df$cc1<-1
df2<- subset(df,Dept == 'MM')
df$cc2<-ifelse(df$Dept == 'MM',1,NA)
lst<-list(df$cc1,df$cc2)
listd<-list("ALL" = df1,"MM" =df2)
t1 <- function(dataset,var,decimal){
numdig <- if (decimal == TRUE) {
1
} else {
0
}
var <- rlang::parse_expr(var)
dataset %>%
filter(!is.na(!!var)) %>%
summarise(
q25 = format(round(quantile(!!var,type = 6,probs = seq(0,0.25),na.rm=TRUE)[2],digits = numdig),nsmall = numdig),Median = format(round(quantile(!!var,na.rm=TRUE)[3],Average = format(round(mean(!!var,na.rm = TRUE),q75 = format(round(quantile(!!var,na.rm = TRUE)[4],N = sum(!is.na(!!var))) %>%
rename(
`25th percentile` = q25,`75th percentile` = q75)
}
listd %>%
purrr::map_dfr(~t1(dataset = .x,var = "sale",decimal = TRUE),.id = " ")
#> 25th percentile Median Average 75th percentile N
#> 1 ALL 14.0 19.0 20.9 27.0 11
#> 2 MM 14.5 17.0 19.0 25.5 4
由reprex package(v0.3.0)于2020-09-23创建
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。