如何解决R:合并具有相同数据的列的函数
在我之前的帖子中(见上面的链接),我想知道如何组合具有相同数据的列并更改列名以反映范围。我从一个产生的函数开始
并且接受的答案产生了我想要的输出
我应用了同样的功能
library(dplyr)
library(flextable)
library(stringr)
library(tidyverse)
Shop_fcn <- function(data){
data %>%
group_by(Day) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names,Day,N_nam,Store,Item) %>%
summarize(n_item = n()) %>%
group_by(Day,Item) %>%
summarize(n_nam = n(),n_item = sum(n_item))%>%
mutate(pct = round(n_nam/N_nam*100,digits = 1),txt = paste0( n_nam," (",pct,"%)"),Day_n = (paste0("Day "," (N=",")")))%>%
ungroup %>%
select(Day_n,Item,txt) %>%
group_by(Store,txt) %>%
summarise(Day_n = if(n() > 1)
sprintf('Day %s %s',paste(range(readr::parse_number(unique(Day_n))),collapse=' - '),str_remove(first(Day_n),'^[^(]+')) else Day_n) %>%
pivot_wider(values_from = txt,names_from = Day_n) %>%
mutate_at(vars(starts_with(c("Day"))),~if_else(is.na(.),"",.)) %>%
arrange(Store,Item) %>%
group_by(store2 = Store) %>%
mutate(Store = if_else(row_number() != 1,Store))%>%
ungroup() %>%
select(Store,str_sort(names(.)[-(1:2)],numeric = TRUE),-store2)
}
到更大的数据集
Names <- as.character(c('Adam','Morticia','Mickey','Minnie','Lucy','Adam','Gomez','Olive','Ricky','Eve','Olive'))
Day <- as.numeric(c(1,1,2,3,6,8,9,10,11,12,13,14,5,4,7,1))
Store <- as.character(c('None','None','ACE','Amazon','Best Buy','CVS','Hobby Lobby','Home Depot','Ikea','Lowes','Petco','Target','Walgreens','Walmart','Walgreens'))
Item <- as.character(c('None','Hammer','Nails','Plywood','Bricks','Frame','Batteries','TV','Advil','Brush','Paint','Level','Wrench','Pillow','Blanket','Lamp','Vase','Table','Chair','Screwdriver','Plunger','Cat food','Cat litter','Goldfish','Dog food','Dog treat','Hamster','Rug','Vacuum','gloves','Tylenol','Napkins','Benadryl','Soap','Shampoo','Conditioner','Lotion','Foil','Foil'))
Shop_list <- as.data.frame(cbind(Names,Item),stringsAsFactors=FALSE)
Shop_day<- Shop_list %>%
bind_rows() %>%
Shop_fcn ()
flextable(Shop_day)
并得到以下内容
第 1-14 天和第 3-5 天不应合并
Shop_fcn <- function(data){
data %>%
group_by(Day) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names,")")))%>%
ungroup %>% select(Day_n,txt) %>%
pivot_wider(values_from = txt,Store))%>%
ungroup() %>% select(-store2)
}
Shop_day<- Shop_list %>%
bind_rows() %>%
Shop_fcn ()
flextable(Shop_day)
但是,我现在遇到了相同的问题,即合并相同的日期(特别是第 8-13 天的列)和未订购 1-14 天的新问题。
我不确定最好的解决方案是修改函数,还是将新函数应用到 flextable 以组合列和相应的列名。
我尝试删除重复的列,但仍然无法想出如何保留重复列的名称显示为范围或如何以正确的顺序获取列的解决方案。
Shop_nodup <- Shop_day[!duplicated(as.list(Shop_day))]
flextable(Shop_nodup)
解决方法
-
列名不按顺序的原因是因为
Day
列是字符类型而不是数字类型。将其转换为数字类将使它们按所需顺序排列。数字变成字符是因为在您的数据生成代码中您使用了as.data.frame(cbind(....))
,其中cbind
将数据转换为矩阵,并且由于矩阵只能包含类型的数据,因此它会将数字转换为字符。相反,您应该使用data.frame(....)
来保持类的类型不变。 -
为了将日期列与相似的值组合在一起,我在根据每天的值创建唯一键后使用
rleid
。
您可以使用的功能是-
library(tidyverse)
library(data.table)
library(flextable)
Shop_fcn <- function(data){
Shop_list %>%
group_by(Day = as.numeric(Day)) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names,Day,N_nam,Store,Item) %>%
summarize(n_item = n()) %>%
group_by(Day,Item) %>%
summarize(n_nam = n(),n_item = sum(n_item)) %>%
ungroup -> tmp
tmp %>%
group_by(Day) %>%
summarise(txt = paste(n_nam,n_item,Item,sep = '-',collapse = ',')) %>%
mutate(grp = rleid(txt)) %>%
select(-txt) %>%
left_join(tmp,by = 'Day') %>%
group_by(grp) %>%
mutate(pct = round(n_nam/N_nam*100,digits = 1),txt = paste0( n_nam," (",pct,"%)"),Day_n = if(n_distinct(Day) > 1) sprintf('Day %s - %s (N = %s)',first(Day),last(Day),N_nam) else sprintf('Day %s (N=%s)',N_nam)) %>%
ungroup %>%
select(Day_n,txt) %>%
pivot_wider(values_from = txt,names_from = Day_n,values_fn = first,values_fill = '') %>%
arrange(Store,Item) %>%
group_by(Store) %>%
mutate(Store = if_else(row_number() != 1,"",Store)) %>%
ungroup()
}
对于您上一篇文章中的数据,此返回 -
Shop_day<- Shop_list %>% Shop_fcn
flextable(Shop_day)
对于这篇文章中的数据,它返回 -
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。