如何在R中同时从两列中抽样?

huangapple go评论72阅读模式
英文:

How to sample simultaneously from two columns in R?

问题

我试图从df_sample中绘制param1param2的样本到df。我尝试使用setDT函数,如其他地方建议的,以输入两列,但输出只识别第一个param1。有关如何修复此问题的建议吗?

注意:param1param2的值应独立抽样。例如,在站点1四分之一1中,可以是5.57和499.04配对,也可以是3.56和499.37配对,但最好也可以交换这些值。因此,我希望独立抽样param1param2

#数据框
set.seed(501)
month <- rep(c("J","J","J","F","M"), each = 5)
site <- rep(c("1","2","3","1","2"), each = 5)
quad <- rep(c("1","2","3","4","5"), rep = 5)

df <- data.frame(month,site,quad)

site <- rep(c("1","2","3"), each = 20)
quad <- c("1","2","3","4","5","1","2","3","4","5","1","2","3","4","5","1","2","3","4","5","1","2","3","4","5","1","2","3","4","5","2","2","3","4","5","1","2","3","4","5","1","2","3","4","5","1","2","3","4","5","1","1","3","4","5","1","1","3","4","5")
param1 <- rnorm(60,5,1)
param2 <- rnorm(60,500,1)

df_sample <- data.frame(site,quad, param1, param2)

library(dplyr)
library(data.table)
df <- setDT(df_sample)[,list(param=list(param1, param2)),by=list(site,quad)][
  setDT(df),
  on = c("site","quad")][,param:=sapply(param, sample, 1)][]
英文:

I am trying to draw samples param1 and param2 from df_sample to df. I tried to use the setDT function as suggested elsewhere, to input two columns..but the output only recognizes the first param1. Any suggestions on how to fix this?

NOTE: The param1 and param2 values should be sampled independently.
For e.g., in site 1 quad 1, it is either the 5.57 and 499.04 pair OR 3.56 and 499.37 pair..but ideally those could be swapped as well. So I'm looking to sample param1 and param2 independently.

#DATA FRAMES
set.seed(501)
month &lt;- rep(c(&quot;J&quot;,&quot;J&quot;,&quot;J&quot;,&quot;F&quot;,&quot;M&quot;), each = 5)
site &lt;- rep(c(&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;1&quot;,&quot;2&quot;), each = 5)
quad &lt;- rep(c(&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;), rep = 5)

df &lt;- data.frame(month,site,quad)

site &lt;- rep(c(&quot;1&quot;,&quot;2&quot;,&quot;3&quot;), each = 20)
quad &lt;- c(&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;2&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;2&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;1&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;,&quot;1&quot;,&quot;1&quot;,&quot;3&quot;,&quot;4&quot;,&quot;5&quot;)
param1 &lt;- rnorm(60,5,1)
param2 &lt;- rnorm(60,500,1)

df_sample &lt;- data.frame(site,quad, param1, param2)

library(dplyr)
library(data.table)
df &lt;- setDT(df_sample)[,list(param=list(param1, param2)),by=list(site,quad)][
  setDT(df),
  on = c(&quot;site&quot;,&quot;quad&quot;)][,param:=sapply(param, sample, 1)][]

答案1

得分: 2

你可以像下面这样做:

library(dplyr) #需要版本1.1.0或更高以支持`.by`
set.seed(1)
df_sample <- 
  df_sample %>%
  mutate(id = row_number(), 
         max = max(id), .by = c(site, quad))

df %>%
  left_join(distinct(df_sample, site, quad, max)) %>%
  mutate(id = sapply(max, sample, size = 1)) %>%
  left_join(df_sample)
   month site quad max id   param1   param2
1      J    1    1   4  1 5.577281 499.0493
2      J    1    2   4  4 4.813692 499.5953
3      J    1    3   4  3 5.237906 500.0975
4      J    1    4   4  1 5.225781 500.9566
5      J    1    5   4  2 6.223762 501.5960
6      J    2    1   3  1 6.151618 500.9271
7      J    2    2   5  3 5.622492 499.7996
8      J    2    3   4  2 4.479705 501.0895
9      J    2    4   4  2 3.680826 500.7056
10     J    2    5   4  3 6.491763 498.2079
11     J    3    1   6  3 6.708715 498.4764
12     J    3    2   2  1 5.360314 499.2427
13     J    3    3   4  1 6.091806 499.8522
14     J    3    4   4  1 5.515952 500.7180
15     J    3    5   4  2 7.633465 499.8845
16     F    1    1   4  2 3.566631 499.3791
17     F    1    2   4  2 3.351793 500.1388
18     F    1    3   4  2 5.035623 500.4443
19     F    1    4   4  3 5.465357 499.9017
20     F    1    5   4  1 4.155195 501.7882
21     M    2    1   3  3 5.519431 499.2223
22     M    2    2   5  5 4.725804 500.7145
23     M    2    3   4  1 5.151815 499.6472
24     M    2    4   4  1 5.455574 500.3809
25     M    2    5   4  1 4.721432 499.4773
英文:

You can do something like the following:

library(dplyr) #1.1.0 or above needed for `.by`
set.seed(1)
df_sample &lt;- 
  df_sample %&gt;% 
  mutate(id = row_number(), 
         max = max(id), .by = c(site, quad))

df %&gt;% 
  left_join(distinct(df_sample, site, quad, max)) %&gt;% 
  mutate(id = sapply(max, sample, size = 1)) %&gt;% 
  left_join(df_sample)
   month site quad max id   param1   param2
1      J    1    1   4  1 5.577281 499.0493
2      J    1    2   4  4 4.813692 499.5953
3      J    1    3   4  3 5.237906 500.0975
4      J    1    4   4  1 5.225781 500.9566
5      J    1    5   4  2 6.223762 501.5960
6      J    2    1   3  1 6.151618 500.9271
7      J    2    2   5  3 5.622492 499.7996
8      J    2    3   4  2 4.479705 501.0895
9      J    2    4   4  2 3.680826 500.7056
10     J    2    5   4  3 6.491763 498.2079
11     J    3    1   6  3 6.708715 498.4764
12     J    3    2   2  1 5.360314 499.2427
13     J    3    3   4  1 6.091806 499.8522
14     J    3    4   4  1 5.515952 500.7180
15     J    3    5   4  2 7.633465 499.8845
16     F    1    1   4  2 3.566631 499.3791
17     F    1    2   4  2 3.351793 500.1388
18     F    1    3   4  2 5.035623 500.4443
19     F    1    4   4  3 5.465357 499.9017
20     F    1    5   4  1 4.155195 501.7882
21     M    2    1   3  3 5.519431 499.2223
22     M    2    2   5  5 4.725804 500.7145
23     M    2    3   4  1 5.151815 499.6472
24     M    2    4   4  1 5.455574 500.3809
25     M    2    5   4  1 4.721432 499.4773

huangapple
  • 本文由 发表于 2023年3月3日 20:23:10
  • 转载请务必保留本文链接:https://go.coder-hub.com/75627031.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定