英文:
R code to identify whether individuals in groups have previously been in groups together
问题
以下是您提供的内容的翻译:
我有一个关于不同群体的个体随时间变化的数据集,有时与相同的群体成员,有时与新成员一起。
我尝试创建一个变量,显示每个群体中的个体已经在以前的群体中与任何当前群体成员一起的次数,使用日期信息来确保计数是按时间顺序排列的(即不计算来自未来群体的共享成员)。
示例数据:
SampleData <- tribble(~ID, ~GROUP_NUM, ~Date,
"abc", 22,"2022-01-15",
"def", 22,"2022-01-15",
"ghi", 22,"2022-01-15",
"jkl", 22,"2022-01-15",
"abc", 14,"2022-02-19",
"mno", 14,"2022-02-19",
"pqr", 14,"2022-02-19",
"stv", 14,"2022-02-19",
"abc", 18,"2022-05-11",
"stv", 18,"2022-05-11",
"wxy", 18,"2022-05-11",
"zzz", 18,"2022-05-11",
"abc", 35,"2022-10-06",
"def", 35,"2022-10-06",
"pqr", 35,"2022-10-06",
"bbb", 35,"2022-10-06",
"abc", 44,"2021-04-14",
"stv", 44,"2021-04-14",
"pqr", 44,"2021-04-14",
"bbb", 44,"2021-04-14")
这是我想要的输出:
AimedData <- tribble(~ID, ~GROUP_NUM, ~Date, ~NPrevKnown,
"abc", 22,"2022-01-15", 0,
"def", 22,"2022-01-15", 0,
"ghi", 22,"2022-01-15", 0,
"jkl", 22,"2022-01-15", 0,
"abc", 14,"2022-02-19", 2,
"mno", 14,"2022-02-19", 0,
"pqr", 14,"2022-02-19", 2,
"stv", 14,"2022-02-19", 2,
"abc", 18,"2022-05-11", 2,
"stv", 18,"2022-05-11", 2,
"wxy", 18,"2022-05-11", 0,
"zzz", 18,"2022-05-11", 0,
"abc", 35,"2022-10-06", 4,
"def", 35,"2022-10-06", 1,
"pqr", 35,"2022-10-06", 3,
"bbb", 35,"2022-10-06", 2,
"abc", 44,"2021-04-14", 0,
"stv", 44,"2021-04-14", 0,
"pqr", 44,"2021-04-14", 0,
"bbb", 44,"2021-04-14", 0)
英文:
I have a dataset of individuals who belong to different groups over time, sometimes with the same group members, sometimes with new members.
I'm trying to create a variable which displays a count of the number of times each individual in a group has been in an earlier group with any of the current group members, using the Date information to make sure the count is in chronological order (i.e. not counting shared membership from groups in the future).
Example data:
SampleData <- tribble(~ID, ~GROUP_NUM, ~Date,
"abc", 22,"2022-01-15",
"def", 22,"2022-01-15",
"ghi", 22,"2022-01-15",
"jkl", 22,"2022-01-15",
"abc", 14,"2022-02-19",
"mno", 14,"2022-02-19",
"pqr", 14,"2022-02-19",
"stv", 14,"2022-02-19",
"abc", 18,"2022-05-11",
"stv", 18,"2022-05-11",
"wxy", 18,"2022-05-11",
"zzz", 18,"2022-05-11",
"abc", 35,"2022-10-06",
"def", 35,"2022-10-06",
"pqr", 35,"2022-10-06",
"bbb", 35,"2022-10-06",
"abc", 44,"2021-04-14",
"stv", 44,"2021-04-14",
"pqr", 44,"2021-04-14",
"bbb", 44,"2021-04-14")
Here's my desired output:
AimedData <- tribble(~ID, ~GROUP_NUM, ~Date, ~NPrevKnown,
"abc", 22,"2022-01-15", 0,
"def", 22,"2022-01-15", 0,
"ghi", 22,"2022-01-15", 0,
"jkl", 22,"2022-01-15", 0,
"abc", 14,"2022-02-19", 2,
"mno", 14,"2022-02-19", 0,
"pqr", 14,"2022-02-19", 2,
"stv", 14,"2022-02-19", 2,
"abc", 18,"2022-05-11", 2,
"stv", 18,"2022-05-11", 2,
"wxy", 18,"2022-05-11", 0,
"zzz", 18,"2022-05-11", 0,
"abc", 35,"2022-10-06", 4,
"def", 35,"2022-10-06", 1,
"pqr", 35,"2022-10-06", 3,
"bbb", 35,"2022-10-06", 2,
"abc", 44,"2021-04-14", 0,
"stv", 44,"2021-04-14", 0,
"pqr", 44,"2021-04-14", 0,
"bbb", 44,"2021-04-14", 0)
答案1
得分: 0
以下是翻译后的代码部分:
SampleData <- dplyr::tribble(~ID, ~GROUP_NUM, ~Date,
"abc", 22,"2022-01-15",
"def", 22,"2022-01-15",
"ghi", 22,"2022-01-15",
"jkl", 22,"2022-01-15",
"abc", 14,"2022-02-19",
"mno", 14,"2022-02-19",
"pqr", 14,"2022-02-19",
"stv", 14,"2022-02-19",
"abc", 18,"2022-05-11",
"stv", 18,"2022-05-11",
"wxy", 18,"2022-05-11",
"zzz", 18,"2022-05-11",
"abc", 35,"2022-10-06",
"def", 35,"2022-10-06",
"pqr", 35,"2022-10-06",
"bbb", 35,"2022-10-06",
"abc", 44,"2021-04-14",
"stv", 44,"2021-04-14",
"pqr", 44,"2021-04-14",
"bbb", 44,"2021-04-14"
)
SampleData$Date <- as.Date(SampleData$Date)
AimedData <- SampleData
AimedData$NPrevKnown <- sapply(1:nrow(SampleData),function(ROW){
individual<-AimedData$ID[ROW]
others <- unique(AimedData$ID[which(
AimedData$GROUP_NUM==AimedData$GROUP_NUM[ROW]&
AimedData$Date==AimedData$Date[ROW]&
AimedData$ID!=AimedData$ID[ROW]
)])
if(length(others)>0){
old_groups <- AimedData[which(
AimedData$Date<AimedData$Date[ROW]& # added a less than here
AimedData$ID==AimedData$ID[ROW]
),]
old_pals<-NULL
if(length(old_groups)>0){
for (i in 1:nrow(old_groups)){
old_pals<-append(
old_pals,
(AimedData$ID[which(
AimedData$GROUP_NUM==old_groups$GROUP_NUM[i]&
AimedData$Date==old_groups$Date[i]&
AimedData$ID!=AimedData$ID[ROW])])
)
}
}
}
length(which(old_pals %in% others))
})
AimedData$WhoPrevKnown <- sapply(1:nrow(SampleData),function(ROW){
individual<-AimedData$ID[ROW]
others <- unique(AimedData$ID[which(
AimedData$GROUP_NUM==AimedData$GROUP_NUM[ROW]&
AimedData$Date==AimedData$Date[ROW]&
AimedData$ID!=AimedData$ID[ROW]
)])
if(length(others)>0){
old_groups <- AimedData[which(
AimedData$Date<AimedData$Date[ROW]& # added a less than here
AimedData$ID==AimedData$ID[ROW]
),]
old_pals<-NULL
if(length(old_groups)>0){
for (i in 1:nrow(old_groups)){
old_pals<-append(
old_pals,
(AimedData$ID[which(
AimedData$GROUP_NUM==old_groups$GROUP_NUM[i]&
AimedData$Date==old_groups$Date[i]&
AimedData$ID!=AimedData$ID[ROW])])
)
}
}
}
paste0(old_pals,collapse = " | ")
})
希望这对您有所帮助。
英文:
Not the most efficient but this might work!
SampleData <- dplyr::tribble(~ID, ~GROUP_NUM, ~Date,
"abc", 22,"2022-01-15",
"def", 22,"2022-01-15",
"ghi", 22,"2022-01-15",
"jkl", 22,"2022-01-15",
"abc", 14,"2022-02-19",
"mno", 14,"2022-02-19",
"pqr", 14,"2022-02-19",
"stv", 14,"2022-02-19",
"abc", 18,"2022-05-11",
"stv", 18,"2022-05-11",
"wxy", 18,"2022-05-11",
"zzz", 18,"2022-05-11",
"abc", 35,"2022-10-06",
"def", 35,"2022-10-06",
"pqr", 35,"2022-10-06",
"bbb", 35,"2022-10-06",
"abc", 44,"2021-04-14",
"stv", 44,"2021-04-14",
"pqr", 44,"2021-04-14",
"bbb", 44,"2021-04-14"
)
SampleData$Date <-as.Date(SampleData$Date)
AimedData <- SampleData
AimedData$NPrevKnown <- sapply(1:nrow(SampleData),function(ROW){
individual<-AimedData$ID[ROW]
others <- unique(AimedData$ID[which(
AimedData$GROUP_NUM==AimedData$GROUP_NUM[ROW]&
AimedData$Date==AimedData$Date[ROW]&
AimedData$ID!=AimedData$ID[ROW]
)])
if(length(others)>0){
old_groups <- AimedData[which(
AimedData$Date<AimedData$Date[ROW]& # added a less than here
AimedData$ID==AimedData$ID[ROW]
),]
old_pals<-NULL
if(length(old_groups)>0){
for (i in 1:nrow(old_groups)){
old_pals<-append(
old_pals,
(AimedData$ID[which(
AimedData$GROUP_NUM==old_groups$GROUP_NUM[i]&
AimedData$Date==old_groups$Date[i]&
AimedData$ID!=AimedData$ID[ROW])])
)
}
}
}
length(which(old_pals %in% others))
})
AimedData$WhoPrevKnown <- sapply(1:nrow(SampleData),function(ROW){
individual<-AimedData$ID[ROW]
others <- unique(AimedData$ID[which(
AimedData$GROUP_NUM==AimedData$GROUP_NUM[ROW]&
AimedData$Date==AimedData$Date[ROW]&
AimedData$ID!=AimedData$ID[ROW]
)])
if(length(others)>0){
old_groups <- AimedData[which(
AimedData$Date<AimedData$Date[ROW]& # added a less than here
AimedData$ID==AimedData$ID[ROW]
),]
old_pals<-NULL
if(length(old_groups)>0){
for (i in 1:nrow(old_groups)){
old_pals<-append(
old_pals,
(AimedData$ID[which(
AimedData$GROUP_NUM==old_groups$GROUP_NUM[i]&
AimedData$Date==old_groups$Date[i]&
AimedData$ID!=AimedData$ID[ROW])])
)
}
}
}
paste0(old_pals,collapse = " | ")
})
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论