微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

通过将低效和慢速分组来合并列

如何解决通过将低效和慢速分组来合并列

代码完全可以满足需要,但是使用这些嵌套循环的速度非常慢(一次超过10k列),有没有更有效/简单的方法呢?

all_xpath<-unique(dplyr::pull(tibble(all.df['Xpath']),Xpath))
all_section<-unique(dplyr::pull(tibble(all.df['section']),section))
valueS<-unique(dplyr::pull(tibble(all.df['attr.']),attr.))

for (j in all_section){
  for (f in all_xpath){
    for (g in valueS){
      allx <-  data.frame(filter(all.df,section==j & attr. == g & Xpath == f),stringsAsFactors=FALSE)
      if (nrow(allx)>1){
          value<-paste(allx$value.,collapse = ' | ')
          if ( any(all.df[all.df$section==j & all.df$attr.==g & all.df$Xpath==f,]$elem.!='telecom')){
            all.df[all.df$section==j & all.df$attr.==g & all.df$Xpath==f,]$value. <- value
}}}}}

valueS<-unique(dplyr::pull(tibble(all.df['elem.']),elem.))
#concat by  elem. and xpath within component
for (j in all_section){
  for (f in all_xpath){
    for (g in valueS){
      allx <-  data.frame(filter(all.df,section==j & elem. == g & Xpath == f & attr.==""),stringsAsFactors=FALSE)
      if (nrow(allx)>1){
        value<-paste(allx$value.,collapse = ' | ')
        all.df[all.df$section==j & all.df$elem.==g & all.df$Xpath==f & all.df$attr.=="",]$value. <- value
      }}}}
all.df<-dplyr::distinct(all.df)

数据的格式如下:

dput(all.df)
structure(list(section = c("LastFiled","LastFiled","Results","History","Cable","Fan","Fan"),elem. = c("code","code","id","effectiveTime","value","code"),attr. = c("code","root","",""),value. = c("8462-4","8462-5","39156-7","39156-8","59408-11","39156-13","59408-13","39156-14","59408-8","39156-9","59408-9","39156-11"),Xpath = c("/Document/othersection/entry/body/sceen/code","/Document/othersection/entry/body/sceen/code","/Document/othersection/entry/body/sceen/id","/Document/othersection/entry/sceen/effectiveTime","/Document/othersection/entry/entryRelationship","/Document/othersection/entry/procedure/entryRelationship/sceen","/Document/othersection/entry/procedure/entryRelationship/sceen"
)),row.names = c(NA,-12L),class = "data.frame")

结果应如下所示:

section     elem.       attr.   value.                  Xpath
LastFiled   code        code    8462-4 | 8462-5     /Document/othersection/entry/body/sceen/code
LastFiled   id          root    39156-7 | 39156-8   /Document/othersection/entry/body/sceen/id
Results     code        code    59408-11 | 39156-13 /Document/othersection/entry/body/sceen/code
History     effectiveTime   value   59408-13 | 39156-14     /Document/othersection/entry/sceen/effectiveTime
Cable       value               59408-8 | 39156-9 | 
                                59408-8 | 39156-9   /Document/othersection/entry/entryRelationship
Fan         code                59408-9 | 39156-11 | 
                                59408-9 | 39156-11  /Document/othersection/entry/procedure/entryRelationship

解决方法

可以使用两个dplyr管道解决问题,每个数据转换一个管道。结果包装在bind_rows中。

library(dplyr)

bind_rows(
  all.df %>%
    filter(elem. != 'telecom') %>%
    group_by(section,attr.,elem.,Xpath) %>%
    summarise(value. = paste(value.,collapse = "|"),.groups = "keep"),all.df %>%
    filter(attr. == "") %>%
    group_by(section,.groups = "keep")
) %>%
  mutate(attr. = ifelse(is.na(attr.),"",attr.)) %>%
  relocate(value.,.before = Xpath)
## A tibble: 8 x 5
## Groups:   section,Xpath [6]
#  section   attr.   elem.         value.            Xpath                                                         
#  <chr>     <chr>   <chr>         <chr>             <chr>                                                         
#1 Cable     ""      value         59408-8|39156-9   /Document/othersection/entry/entryRelationship                
#2 Fan       ""      code          59408-9|39156-11  /Document/othersection/entry/procedure/entryRelationship/sceen
#3 History   "value" effectiveTime 59408-13|39156-14 /Document/othersection/entry/sceen/effectiveTime              
#4 LastFiled "code"  code          8462-4|8462-5     /Document/othersection/entry/body/sceen/code                  
#5 LastFiled "root"  id            39156-7|39156-8   /Document/othersection/entry/body/sceen/id                    
#6 Results   "code"  code          59408-11|39156-13 /Document/othersection/entry/body/sceen/code                  
#7 Cable     ""      value         59408-8|39156-9   /Document/othersection/entry/entryRelationship                
#8 Fan       ""      code          59408-9|39156-11  /Document/othersection/entry/procedure/entryRelationship/sceen

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。