如何在dplyr中使用`bind_rows`处理大小不兼容的列表?

huangapple go评论59阅读模式
英文:

How to use bind_rows for a list with incompatible sizes in dplyr?

问题

I have translated the code part as requested. Here is the translated code:

library(tidyverse)

df %>%
  purrr::transpose() %>%
  `$`(.borders) %>%
  bind_rows() %>%
  pivot_longer(everything(), values_to = "Borders")

Please let me know if you need any further assistance.

英文:

I am having a data like the following

$ECe
   .type  .value  .borders .feature
1    ale -0.866  0.490      ECe
2    ale -0.811  2.680      ECe
3    ale -0.749  4.990      ECe
4    ale -0.501  7.994      ECe
5    ale -0.472  8.801      ECe
6    ale -0.364 11.140      ECe
7    ale -0.351 14.090      ECe
8    ale -0.172 15.900      ECe
9    ale -0.068 17.810      ECe
10   ale  0.069 18.750      ECe
11   ale  0.132 20.190      ECe
12   ale  0.176 21.020      ECe
13   ale  0.227 22.950      ECe
14   ale  0.295 24.520      ECe
15   ale  0.458 28.290      ECe
16   ale  0.534 31.640      ECe
17   ale  0.265 37.990      ECe
18   ale  0.330 40.700      ECe
19   ale  0.793 45.117      ECe
20   ale  0.393 53.090      ECe
21   ale  0.752 59.719      ECe

$BD
   .type      .value  .borders .feature
1    ale  2.88401670 0.957       BD
2    ale  2.89757908 1.138       BD
3    ale  2.06229728 1.185       BD
4    ale  1.86256683 1.220       BD
5    ale  1.22004089 1.299       BD
6    ale  0.47244692 1.339       BD
7    ale  0.28531498 1.401       BD
8    ale  0.21050138 1.486       BD
9    ale  0.06942624 1.500       BD
10   ale -0.01446751 1.524       BD
11   ale -0.92647637 1.637       BD
12   ale -0.99773176 1.651       BD
13   ale -1.62164363 1.704       BD
14   ale -2.12120182 1.800       BD
15   ale -1.87179280 1.983       BD
16   ale -1.64798056 2.075       BD
17   ale -1.55269957 2.170       BD
18   ale -1.64049346 2.220       BD

I want to create a data frame like

# A tibble: 120 × 2
   name  Borders
   <chr>   <dbl>
 1 ECe     0.491
 2 SOC     0.261
 3 BD      0.958
 4 Sand    4.00 
 5 Silt    0    
 6 Clay    0    
 7 ECe     3.61 
 8 SOC     0.387
 9 BD      1.02 
10 Sand    8.42 
# ℹ 110 more rows

I am using the following code

library(tidyverse)

df %>%
  purrr::transpose() %>%
  `$`(.borders) %>%
  bind_rows() %>% 
  pivot_longer(everything(), values_to = "Borders")

But it returns the following error
>Error in recycle_columns():
! Tibble columns must have compatible sizes.
• Size 18: Columns BD and Clay.
• Size 20: Columns Sand and Silt.
• Size 21: Columns ECe and SOC.
ℹ Only values of size one are recycled.

How can I achieve this in R?
Here is the data in dput format

df <-
list(
ECe = structure(
list(
.type = c(
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale"
),
.value = c(
-0.866449860261058,-0.811874082709701,
-0.749993003099365,
-0.501212768536108,
-0.47268510237864,-0.364538974363897,
-0.351930853130633,
-0.172990179989361,
-0.0684651440322241,
0.0692627161333526,
0.132807737313057,
0.176914094922519,
0.227473041741352,
0.295586935903242,
0.458768756260148,
0.534728976733407,
0.265760631807008,
0.330347944699875,
0.793585231260766,
0.393724931630266,
0.752824545868792
),
.borders = c(
0.490806,
2.68,
4.99,
7.994,
8.801,
11.14,
14.09,
15.9,
17.81,
18.75,
20.19,
21.02,
22.95,
24.52,
28.29,
31.64,
37.99,
40.7,
45.1172,
53.09,
59.7192
),
.feature = c(
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe",
"ECe"
)
),
row.names = c(NA,-21L),
class = "data.frame"
),
SOC = structure(
list(
.type = c(
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale"
),
.value = c(
-1.12668195657973,-1.14070505975704,
-1.12096557264514,
-1.0364427676548,
-0.905420835797799,-0.683789433176536,
-0.53425868958447,
-0.483272349572241,-0.360467057150275,
-0.0571214846376487,
0.0130773857269451,
0.164166766380452,
0.233741319768938,
0.174784707378661,
0.188264622491949,
0.150568432447101,
0.532302724503294,
0.733931793684184,
1.1531117354405,
2.58573209158873,
2.75830864578748
),
.borders = c(
0.260869565217391,
0.330143540669856,
0.422727272727273,
0.545454545454545,
0.631578947368421,
0.686915887850467,
0.763636363636364,
0.802631578947368,
0.828571428571429,
0.889952153110048,
0.967289719626168,
1.02272727272727,
1.09090909090909,
1.23684210526316,
1.40520446096654,
1.52727272727273,
1.80861244019139,
1.90909090909091,
2.20093457943925,
2.49767441860465,
2.66511627906977
),
.feature = c(
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC",
"SOC"
)
),
row.names = c(NA,-21L),
class = "data.frame"
),
BD = structure(
list(
.type = c(
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale"
),
.value = c(
2.88401670431712,
2.89757908472024,
2.06229728227612,
1.8625668296819,
1.22004089065561,
0.472446924052171,
0.285314983167609,
0.210501378242261,
0.0694262373804251,-0.0144675088601809,
-0.926476372785716,
-0.997731763217841,-1.62164362525624,
-2.12120181627754,
-1.87179280480865,
-1.64798056454827,-1.5526995652836,
-1.64049345720051
),
.borders = c(
0.957710151765353,
1.13854777070064,
1.18522292993631,
1.22058598726115,
1.29930014940631,
1.33971974522293,
1.40121097743178,
1.48638829912715,
1.5,
1.52428025477707,
1.6371974522293,
1.65183612487222,
1.70405095541401,
1.80025477707006,
1.98361248722183,
2.07583549579303,
2.17044900526854,
2.22039789258473
),
.feature = c(
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD",
"BD"
)
),
row.names = c(NA,-18L),
class = "data.frame"
),
Sand = structure(
list(
.type = c(
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale"
),
.value = c(
5.72243888922004,
5.04938965197184,
4.7017193997508,
4.38186986390524,
3.55049136618825,
3.79907192663079,
3.65192607277815,
2.69221076986002,
2.08417035244566,
0.508134498143654,
-0.760157331236183,-2.22196153094718,
-2.60183427497522,
-2.6090089814882,
-4.29829842975557,-4.89113681989785,
-6.17719615317024,
-6.51041335854504,
-6.70721878970499,-6.69985492661679
),
.borders = c(
3.9999999999992,
7.99999999999983,
12.0000000000005,
20.0000000000003,
23.9999999999995,
27.9999999999987,
31.9999999999993,
39.9999999999991,
43.9999999999998,
48.0000000000004,
52.000000000001,
59.9999999999994,
63.9999999999986,
64.0000000000001,
68.0000000000007,
71.9999999999999,
76.0000000000005,
79.9999999999997,
84.0000000000003,
88.000000000001
),
.feature = c(
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand",
"Sand"
)
),
row.names = c(NA,-20L),
class = "data.frame"
),
Silt = structure(
list(
.type = c(
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale"
),
.value = c(
-1.81467365524819,-1.80595765781436,
-1.44252832794551,
-1.40678289959526,-1.3809409627197,
-1.27631650711468,
-0.918535212874314,-0.891479949841867,
-0.130663280250859,
0.054226944600215,
0.0371390649359506,
0.547567627523539,
0.540716330465325,
1.14691203242341,
1.45243650519454,
1.51824963792443,
1.76292774269082,
1.85725359158924,
2.48581029669555,
0.388409658807059
),
.borders = c(
0,
3.99999999999921,
7.99999999999841,
7.99999999999983,
8.00000000000125,
11.999999999999,
15.9999999999997,
16.0000000000011,
20.0000000000003,
23.9999999999995,
24.0000000000009,
27.9999999999987,
28.0000000000001,
31.9999999999993,
35.9999999999985,
40.0000000000006,
43.9999999999998,
47.999999999999,
64.0000000000015,
79.9999999999997
),
.feature = c(
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt",
"Silt"
)
),
row.names = c(NA,-20L),
class = "data.frame"
),
Clay = structure(
list(
.type = c(
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale",
"ale"
),
.value = c(
-1.02199413478959,-0.826304780697195,
-0.942406575344331,
-0.971009582383038,-1.24981505440423,
-1.24219672780758,
-0.443954661148336,-0.344646636267013,
-0.344646636267013,
0.0190677799370838,
0.509523035852588,
0.805292224788913,
0.990611614682741,
0.998147256094054,
1.16179870265779,
2.06234942853985,
2.8900928986798,
3.82397972588229
),
.borders = c(
0,
7.99999999999983,
11.999999999999,
12.0000000000005,
15.9999999999997,
16.0000000000011,
20.0000000000003,
23.9999999999995,
24.0000000000009,
28.0000000000001,
31.9999999999993,
35.9999999999999,
39.9999999999991,
40.0000000000006,
47.999999999999,
51.9999999999996,
56.0000000000002,
67.9999999999993
),
.feature = c(
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay",
"Clay"
)
),
row.names = c(NA,-18L),
class = "data.frame"
)
)

答案1

得分: 2

以下是翻译好的内容:

使用 map()list_rbind()

library(tidyverse)

map(df, ~ select(.x, Name = .feature, Borders = .borders, Value = .value)) %>%
  list_rbind() %>%
  group_by(Name) %>%
  mutate(row_order = 1:n()) %>%
  ungroup() %>%
  arrange(row_order) %>%
  select(-row_order)
# 一个数据框:118 行 × 3 列
   Name  Borders  Value
   <chr>   <dbl>  <dbl>
 1 ECe     0.491 -0.866
 2 SOC     0.261 -1.13 
 3 BD      0.958  2.88 
 4 Sand    4.00   5.72 
 5 Silt    0     -1.81 
 6 Clay    0     -1.02 
 7 ECe     2.68  -0.812
 8 SOC     0.330 -1.14 
 9 BD      1.14   2.90 
10 Sand    8.00   5.05 
# … 还有 108 行

如果顺序不重要,您可以使用以下方式:

map(df, ~ select(.x, Name = .feature, Borders = .borders, Value = .value)) %>%
  list_rbind()
英文:

Use map() and list_rbind():

library(tidyverse)
map(df, ~ select(.x, Name = .feature, Borders = .borders, Value = .value)) |&gt;
list_rbind() |&gt;
group_by(Name) |&gt;
mutate(row_order = 1:n()) |&gt;
ungroup() |&gt;
arrange(row_order) |&gt;
select(-row_order)
# A tibble: 118 &#215; 3
Name  Borders  Value
&lt;chr&gt;   &lt;dbl&gt;  &lt;dbl&gt;
1 ECe     0.491 -0.866
2 SOC     0.261 -1.13 
3 BD      0.958  2.88 
4 Sand    4.00   5.72 
5 Silt    0     -1.81 
6 Clay    0     -1.02 
7 ECe     2.68  -0.812
8 SOC     0.330 -1.14 
9 BD      1.14   2.90 
10 Sand    8.00   5.05 
# … with 108 more rows

If the order does not matter, you can just go with:

map(df, ~ select(.x, Name = .feature, Borders = .borders, Value = .value)) |&gt;
list_rbind()

答案2

得分: 2

Here is the translated content you requested:

Udpate OP请求见评论:

要添加.value列:

imap_dfr(df, ~ data.frame(name = .y, Borders = .x$.borders, value = .x$.value))

   name  Borders   value
   <chr>   <dbl>   <dbl>
 1 ECe     0.491 -0.866 
 2 ECe     2.68  -0.812 
 3 ECe     4.99  -0.750 
 4 ECe     7.99  -0.501 
 5 ECe     8.80  -0.473 
 6 ECe    11.1   -0.365 
 7 ECe    14.1   -0.352 
 8 ECe    15.9   -0.173 
 9 ECe    17.8   -0.0685
10 ECe    18.8    0.0693
# … with 108 more rows

第一个答案
为了解决您遇到的错误:

  1. 使用 purrr::transpose() %>% $(.borders) 生成了不同长度的6个向量的列表。
    然后您尝试将它们按列与bind_rows组合到一个tibble中。错误消息显示:
• Size 18: Columns `BD` and `Clay`.
• Size 20: Columns `Sand` and `Silt`.
• Size 21: Columns `ECe` and `SOC`.
ℹ Only values of size one are recycled.```
要实现您想要的操作,您可以使用 `purrr::map_df()` 或其变体 `purrr::imap_dfr()`:
这是使用 `map_df` 的版本:
```R
library(purrr)
library(dplyr)
map(df, ~(.x %>% select(c(.feature, .borders)))) %>%
map_df(., ~ .x %>% select(.borders), .id = "name")

或者

library(purrr)
library(dplyr)

imap_dfr(df, ~ data.frame(name = .y, Borders = .x$.borders))
 name   .borders
1    ECe  0.4908060
2    ECe  2.6800000
3    ECe  4.9900000
4    ECe  7.9940000
5    ECe  8.8010000
6    ECe 11.1400000
7    ECe 14.0900000
8    ECe 15.9000000
9    ECe 17.8100000
10   ECe 18.7500000
11   ECe 20.1900000
12   ECe 21.0200000
13   ECe 22.9500000
14   ECe 24.5200000
15   ECe 28.2900000
16   ECe 31.6400000
17   ECe 37.9900000
18   ECe 40.7000000
19   ECe 45.1172000
20   ECe 53.0900000
21   ECe 59.7192000
22   SOC  0.2608696
23   SOC  0.3301435
24   SOC  0.4227273
25   SOC  0.5454545
26   SOC  0.6315789
27   SOC  0.6869159
28   SOC  0.7636364
29   SOC  0.8026316
30   SOC  0.8285714
31   SOC  0.8899522
32   SOC  0.9672897
33   SOC  1.0227273
34   SOC  1.0909091
35   SOC  1.2368421
36   SOC  1.4052045
37   SOC  1.5272727
38   SOC  1.8086124
39   SOC  1.9090909
40   SOC  2.2009346
41   SOC  2.4976744
42   SOC  2.6651163
43    BD  0.9577102
44    BD  1.1385478
45    BD  1.1852229
46    BD  1.2205860
47    BD  1.2993001
48    BD  1.3397197
49    BD  1.4012110
50    BD  1.4863883
51    BD  1.5000000
52    BD  1.5242803
53    BD  1.6371975
54    BD  1.6518361
55    BD  1.7040510
56    BD  1.8002548
57    BD  1.9836125
58    BD  2.0758355
59    BD  2.1704490
60    BD  2.2203979
61  Sand  4.0000000
62  Sand  8.0000000
63  Sand 12.0000000
64  Sand 20.0000000
65  Sand 24.0000000
66  Sand 28.0000000
67  Sand 32.0000000
68  Sand 40.0000000
69  Sand 44.0000000
70  Sand 48.0000000
71  Sand 52.0000000
72  Sand 60.0000000
73  Sand 64.0000000
74  Sand 64.0000000
75  Sand 68.0000000
76  Sand 72.0000000
77  Sand 76.0000000
78  Sand 80.0000000
79  Sand 84.0000000
80  Sand 88.0000000
81  Silt  0.0000000
82  Silt  4.0000000
83  Silt  8.0000000
84  Silt  8.0000000
85  Silt  8.0000000
86  Silt 12.0000000
87  Silt 16.0000000
88  Silt 16.0000000
89  Silt 20.0000000
90  Silt 24.0000000
91  Silt 24.0000000
92  Silt 28.0000000
93  Silt 28.000000

<details>
<summary>英文:</summary>

**Udpate** OP request see comments: 

to add .value column: 

imap_dfr(df, ~ data.frame(name = .y, Borders = .x$.borders, value = .x$.value))

name Borders value
<chr> <dbl> <dbl>
1 ECe 0.491 -0.866
2 ECe 2.68 -0.812
3 ECe 4.99 -0.750
4 ECe 7.99 -0.501
5 ECe 8.80 -0.473
6 ECe 11.1 -0.365
7 ECe 14.1 -0.352
8 ECe 15.9 -0.173
9 ECe 17.8 -0.0685
10 ECe 18.8 0.0693

… with 108 more rows


**First answer:** 
To address the error you get: 
1. With `purrr::transpose() %&gt;%
`$`(.borders)` you create list of 6 vectors of different length.
And then you try to combine them columnwise with bind_rows to a tibble. And the error says 
``` ! Tibble columns must have compatible sizes.
• Size 18: Columns `BD` and `Clay`.
• Size 20: Columns `Sand` and `Silt`.
• Size 21: Columns `ECe` and `SOC`.
ℹ Only values of size one are recycled.```

To get what you want to do, you could either use purrr::map_df() or its variant purrr::imap_dfr():

Here is a version using map_df:

library(purrr)
library(dplyr)
map(df, ~(.x %&gt;% select(c(.feature, .borders)))) %&gt;% 
map_df(., ~ .x %&gt;% select(.borders), .id = &quot;name&quot;)

OR

library(purrr)
library(dplyr)
imap_dfr(df, ~ data.frame(name = .y, Borders = .x$.borders))
 name   .borders
1    ECe  0.4908060
2    ECe  2.6800000
3    ECe  4.9900000
4    ECe  7.9940000
5    ECe  8.8010000
6    ECe 11.1400000
7    ECe 14.0900000
8    ECe 15.9000000
9    ECe 17.8100000
10   ECe 18.7500000
11   ECe 20.1900000
12   ECe 21.0200000
13   ECe 22.9500000
14   ECe 24.5200000
15   ECe 28.2900000
16   ECe 31.6400000
17   ECe 37.9900000
18   ECe 40.7000000
19   ECe 45.1172000
20   ECe 53.0900000
21   ECe 59.7192000
22   SOC  0.2608696
23   SOC  0.3301435
24   SOC  0.4227273
25   SOC  0.5454545
26   SOC  0.6315789
27   SOC  0.6869159
28   SOC  0.7636364
29   SOC  0.8026316
30   SOC  0.8285714
31   SOC  0.8899522
32   SOC  0.9672897
33   SOC  1.0227273
34   SOC  1.0909091
35   SOC  1.2368421
36   SOC  1.4052045
37   SOC  1.5272727
38   SOC  1.8086124
39   SOC  1.9090909
40   SOC  2.2009346
41   SOC  2.4976744
42   SOC  2.6651163
43    BD  0.9577102
44    BD  1.1385478
45    BD  1.1852229
46    BD  1.2205860
47    BD  1.2993001
48    BD  1.3397197
49    BD  1.4012110
50    BD  1.4863883
51    BD  1.5000000
52    BD  1.5242803
53    BD  1.6371975
54    BD  1.6518361
55    BD  1.7040510
56    BD  1.8002548
57    BD  1.9836125
58    BD  2.0758355
59    BD  2.1704490
60    BD  2.2203979
61  Sand  4.0000000
62  Sand  8.0000000
63  Sand 12.0000000
64  Sand 20.0000000
65  Sand 24.0000000
66  Sand 28.0000000
67  Sand 32.0000000
68  Sand 40.0000000
69  Sand 44.0000000
70  Sand 48.0000000
71  Sand 52.0000000
72  Sand 60.0000000
73  Sand 64.0000000
74  Sand 64.0000000
75  Sand 68.0000000
76  Sand 72.0000000
77  Sand 76.0000000
78  Sand 80.0000000
79  Sand 84.0000000
80  Sand 88.0000000
81  Silt  0.0000000
82  Silt  4.0000000
83  Silt  8.0000000
84  Silt  8.0000000
85  Silt  8.0000000
86  Silt 12.0000000
87  Silt 16.0000000
88  Silt 16.0000000
89  Silt 20.0000000
90  Silt 24.0000000
91  Silt 24.0000000
92  Silt 28.0000000
93  Silt 28.0000000
94  Silt 32.0000000
95  Silt 36.0000000
96  Silt 40.0000000
97  Silt 44.0000000
98  Silt 48.0000000
99  Silt 64.0000000
100 Silt 80.0000000
101 Clay  0.0000000
102 Clay  8.0000000
103 Clay 12.0000000
104 Clay 12.0000000
105 Clay 16.0000000
106 Clay 16.0000000
107 Clay 20.0000000
108 Clay 24.0000000
109 Clay 24.0000000
110 Clay 28.0000000
111 Clay 32.0000000
112 Clay 36.0000000
113 Clay 40.0000000
114 Clay 40.0000000
115 Clay 48.0000000
116 Clay 52.0000000
117 Clay 56.0000000
118 Clay 68.0000000

huangapple
  • 本文由 发表于 2023年5月14日 17:10:56
  • 转载请务必保留本文链接:https://go.coder-hub.com/76246684.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定