如何解决将长数据表重塑为宽数据表列表
我的问题是这里提出的问题的扩展 How to reshape data from long to wide format 所以我会用类似的方式来表达。
不同之处在于我想将一个长数据表重新排列为list
宽数据表。
dat <- data.table(
sim = rep(c(1,2),each=4),time = rep(1:4,value1 = rnorm(8),value2 = rnorm(8)
)
dat
sim time value1 value2
1 1 1 0.3407 0.5167
2 1 2 -0.7033 0.8416
3 1 3 -0.3795 -0.4717
4 1 4 -0.7460 0.8479
5 2 1 0.8981 -0.7163
6 2 2 -0.3347 -0.6849
7 2 3 0.5013 0.8941
8 2 4 -0.1745 0.0795
我想重塑它,以便我有一个 list
的宽数据表,名为 value1
、value2
... value99
等...
l = list()
l[["value1"]]
sim 1 2 3 4
1 1 0.3407 -0.7033 -0.3795 -0.7460
5 2 -0.8981 -0.3347 -0.5013 -0.1745
l[["value2"]]
sim 1 2 3 4
1 1 0.5167 0.8416 -0.4717 0.8479
5 2 -0.7163 -0.6849 0.8941 0.0795
解决方法
两种变体。
数据表
library(data.table)
tmp <- dcast(melt(as.data.table(dat),id = c("sim","time")),sim + variable ~ time)
tmp <- split(tmp,tmp$variable)
tmp <- lapply(tmp,set,i = NULL,j = "variable",value = NULL)
tmp
# $value1
# sim 1 2 3 4
# <num> <num> <num> <num> <num>
# 1: 1 1.0458737762 -0.4845954 0.1891288 0.05100633
# 2: 2 -0.0002406689 1.8093820 -0.8253280 1.14547045
# $value2
# sim 1 2 3 4
# <num> <num> <num> <num> <num>
# 1: 1 0.03157319 -0.8352058 -0.06876365 0.7467717
# 2: 2 -0.42551873 -0.7720822 0.15276411 0.9885968
我经常将 magrittr::%>%
与 data.table
一起使用,以便可以将其转换为
library(data.table)
library(magrittr) # if %>% is not already available
as.data.table(dat) %>%
melt(.,"time")) %>%
dcast(.,sim + variable ~ time) %>%
split(.,.$variable) %>%
lapply(.,value = NULL)
# $value1
# sim 1 2 3 4
# <num> <num> <num> <num> <num>
# 1: 1 1.0458737762 -0.4845954 0.1891288 0.05100633
# 2: 2 -0.0002406689 1.8093820 -0.8253280 1.14547045
# $value2
# sim 1 2 3 4
# <num> <num> <num> <num> <num>
# 1: 1 0.03157319 -0.8352058 -0.06876365 0.7467717
# 2: 2 -0.42551873 -0.7720822 0.15276411 0.9885968
tidyverse
library(dplyr)
library(tidyr) # pivot_longer,pivot_wider
dat %>%
pivot_longer(.,-c(sim,time)) %>%
pivot_wider(.,names_from = time,values_from = value) %>%
split(.,.$name) %>%
lapply(.,select,-name)
# $value1
# # A tibble: 2 x 5
# sim `1` `2` `3` `4`
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 1.05 -0.485 0.189 0.0510
# 2 2 -0.000241 1.81 -0.825 1.15
# $value2
# # A tibble: 2 x 5
# sim `1` `2` `3` `4`
# <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0.0316 -0.835 -0.0688 0.747
# 2 2 -0.426 -0.772 0.153 0.989
,
我对此问题的解决方案是创建结果的嵌套数据帧。我已经提供了该方法的简要说明,然后是一个 reprex。
我会通过使用 pivot_wider()
和 pivot_longer()
来重塑数据来做到这一点。 pivot_longer
首先用于使每行只包含 1 个值,并带有时间、模拟和值一或二的标签。然后使用 pivot_wider
每行将包含每次带有模拟标签的值以及它们是哪组值。 (值 1 或值 2)。
最后,我们使用 nest
嵌套数据帧,它存储数据帧中每组值的所有数据。如有必要,可以通过 nested_vals$data
将其作为数据帧数组访问,其中nested_vals 是我们将嵌套数据帧分配给的对象。
library(tidyverse)
#Setup data
dat <- data.frame(
sim = rep(c(1,2),each=4),time = rep(1:4,value1 = rnorm(8),value2 = rnorm(8)
)
# Construct nested dataframe
nested_vals <- dat %>%
# Format dataset in tidy format
pivot_longer(cols = c(value1,value2)) %>%
# Move the name of the data to the beginning of the dataframe
relocate(name) %>%
# Pivot to matrix form as requested (i.e. times as columns,sims as rows)
pivot_wider(id_cols = c(name,sim),values_from = value) %>%
# Nest results by name
nest(-name)
#> Warning: All elements of `...` must be named.
#> Did you want `data = c(sim,`1`,`2`,`3`,`4`)`?
nested_vals
#> # A tibble: 2 x 2
#> name data
#> <chr> <list>
#> 1 value1 <tibble[,5] [2 x 5]>
#> 2 value2 <tibble[,5] [2 x 5]>
nested_vals$data[[2]]
#> # A tibble: 2 x 5
#> sim `1` `2` `3` `4`
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.0639 0.250 -1.28 0.850
#> 2 2 -1.90 0.000421 0.704 -0.164
由 reprex package (v2.0.0) 于 2021 年 4 月 7 日创建
,另一种方式,使用单个管道语法
library(tidyverse)
dat %>% pivot_longer(c(value1,value2)) %>%
group_split(name) %>% setNames(map(.,~.x[[3]][1])) %>%
map(~ .x %>% pivot_wider(id_cols = sim,values_from = value))
$value1
# A tibble: 2 x 5
sim `1` `2` `3` `4`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 -0.851 -0.0484 -0.656 -0.121
2 2 -0.645 1.59 -0.274 0.445
$value2
# A tibble: 2 x 5
sim `1` `2` `3` `4`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1.46 -1.62 -0.672 1.43
2 2 1.65 0.790 0.495 0.162
,
另一种方法:
library(dplyr)
library(tidyr)
wide_dat <- dat %>% pivot_wider(id_cols = sim,values_from = starts_with('value'))
lapply(lapply(split.default(wide_dat[-1],sub('_\\d','',names(wide_dat[-1]))),function(x) cbind(wide_dat[1],x)),setNames,c('sim',1:4))
$value1
sim 1 2 3 4
1 1 -0.1704969 0.2820143 1.181898 2.2377396
2 2 2.1920534 0.8214070 0.421177 0.7601796
$value2
sim 1 2 3 4
1 1 0.1760887 0.3440053 -0.8435849 0.6729751
2 2 -0.1714095 1.5125986 -0.5739871 -0.9648294
,
tidyverse 解决方案可能是:
library(dplyr)
library(purrr)
library(tidyr)
dat_longer <- dat %>%
tidyr::pivot_longer(starts_with("value"),names_to="col_name",values_to="values")
list_wide <- purrr::map(unique(dat_longer[["col_name"]]),~dat_longer %>%
dplyr::filter(col_name==.x) %>%
tidyr::pivot_wider(values_from = "values",names_from="time") %>%
select(-col_name)) %>%
purrr::set_names(unique(dat_longer[["col_name"]]))
$value1
# A tibble: 2 x 5
sim `1` `2` `3` `4`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 -0.710 -0.334 -0.370 0.777
2 2 0.130 0.877 1.24 -0.202
$value2
# A tibble: 2 x 5
sim `1` `2` `3` `4`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 -0.719 -0.909 0.0821 -0.158
2 2 -0.706 1.51 0.234 1.09
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。