英文:
Grouping Semesters into Academic Years generalization
问题
我关于我的代码的特定部分有一些疑问。这个循环输入学期文件,计算新的列,并输出带有新变量的数据集。循环运行得很好,但Acad_Year
变量是静态的,我正在寻找一种方法,使它更灵活,这样每次有新的数据集时,我就不需要去重新编写case_when
语句。示例数据可用。谢谢您的帮助!
require("knitr")
setwd("~/Downloads/Stack Overflow/")
library(dplyr)
library(tidyr)
library(writexl)
PhGrad <- rbind(PhGrad_08, PhGrad_SP_23) %>%
filter(!BannerID== "")
d <- tibble(
filename = list.files(),
Sem = gsub(".*(Fall|Spring|Summer).*", "//1", filename),
Year = gsub(".*(//d{2}).*", "//1", filename),
grp = gsub(".*(ASPH|ID).*", "//1", filename)) %>%
pivot_wider(names_from = "grp", values_from="filename")
res <- vector(mode="list", length=nrow(d))
names(res) <- paste(d$Sem, d$Year, sep="_")
for(i in seq_along(res)){
ASPH <- rio::import(d$ASPH[i])
ID <- rio::import(d$ID[i])
res[[i]] <- bind_rows(ASPH, ID) %>%
distinct(ID, Program, .keep_all = T) %>%
rowwise() %>%
mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== "Y", na.rm=T)) %>%
ungroup() %>%
mutate(racecode= case_when(Citizenship %in% list("NN", "NV") ~ "foreign_national",
`Race- Hispanic`== "Y" ~ "hispanic_latino",
racecount >1 ~ "two_or_more_races",
`Race-Am Ind`== "Y" ~ "american_indian_alaskan_native",
`Race- Asian`== "Y" ~ "asian",
`Race-Afr Amer`== "Y" ~ "black_african_american",
`Race- Hawaiian` == "Y" ~ "native_hawaiian_pacific_islander",
`Race- Caucasian`== "Y" ~ "white",
`Race-Not Rept`== "Y" ~ "race_unknown",
TRUE~ "race_unknown"),
gender_long= case_when(Gender== "F"~ "Female",
Gender== "M"~ "Male",
Gender== "N"~ "Other",
TRUE~ "other"),
DEPT= case_when(Program %in% list("3GPH363AMS", "3GPH363AMSP", "3GPH378AMCD", "3GPH378AMS", "3GPH379APHD")~ "COMD",
Program %in% list("3GPH593AMPH", "3GPH593AMS", "3GPH593APHD", "3GPH569ACGS")~ "ENHS",
Program %in% list("3GPH596AMS", "3GPH596AMSPH", "3GPH596APHD","3GPH594AMPH", "3GPH594AMS", "3GPH594AMSPH", "3GPH594APHD", "3GPH586APBAC")~ "EPID/BIOS",
Program %in% list("3GPH331AMS","3GPH331APHD","3GPH334AMS","3GPH335ADPT", "3GPH377AMS", "3GPH388AMS", "3GPH588AMPH", "3GPHJ331MS", "3UPH331ABS")~ "EXSC",
Program %in% list("3GPH568APBAC","3GPH592ACGS","3GPH592AMPH", "3GPH592APHD", "3GPH576ACGS", "3GPH121ACGS", "3GID635ACGS")~ "HPEB",
Program %in% list("3GPH591AMPH", "3GPH591APHD", "3GPH597AMHA","3GPH591ADPH")~ "HSPM",
TRUE~ "Missing"),
degree_delivery_type= case_when(`First Concentration`== "R999" | `Second Concentration`== "R999" ~ "Distance-based",
`First Concentration`== "3853" | `Second Concentration`== "3853" ~ "Executive",
TRUE~ "Campus-based"),
Sem_Year= paste0(d$Sem[i],"_",d$Year[i]),
StudentCount= 1,
Acad_Year= case_when(Sem_Year %in% list("Fall_18", "Spring_19", "Summer_19")~ "AY2018-19",
Sem_Year %in% list("Fall_19", "Spring_20", "Summer_20")~ "AY2019-20",
Sem_Year %in% list("Fall_20", "Spring_21", "Summer_21")~ "AY2020-21",
Sem_Year %in% list("Fall_21", "Spring_22", "Summer_22")~ "AY2021-22",
Sem_Year %in% list("Fall_22", "Spring_23")~ "AY2022-23"),
Deg_group = case_when(Degree %in% list("DPT", "PHD", "DPH")~ "Doctorate",
Degree %in% list("MSP", "MCD", "MPH", "MHA", "MS","MSPH")~ "Masters",
Degree %in% list("CGS", "PBACC")~ "Certificate")) %>%
left_join(., PhGrad %>% mutate_at(vars(BannerID), ~as.character(.)), by= c("ID"="BannerID", "DEPT"), unmatched= "drop", relationship= "many-to-many") %>%
mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
is.na(Degree.y)== F~ Degree.y,
TRUE~ "Error")) %>%
select(-c(ApplicationID:StudentStatus))
}
英文:
I have about a specific section of my code. The loop inputs semester files, computes new columns and outputs a data set with the new variables. The loop works beautifully, however making the Acad_Year
variable is stagnant, I am looking for a way to make it more flexible so that I won't need to go in and re-write the case_when
statement every time there is a new dataset. Sample data is available. Thank you in advance!
{r setup}
require("knitr")
setwd("~/Downloads/Stack Overflow/")
library(dplyr)
library(tidyr)
library(writexl)
PhGrad <- rbind(PhGrad_08, PhGrad_SP_23) %>%
filter(!BannerID== "")
d <- tibble(
filename = list.files(),
Sem = gsub(".*(Fall|Spring|Summer).*", "//1", filename),
Year = gsub(".*(//d{2}).*", "//1", filename),
grp = gsub(".*(ASPH|ID).*", "//1", filename)) %>%
pivot_wider(names_from = "grp", values_from="filename")
res <- vector(mode="list", length=nrow(d))
names(res) <- paste(d$Sem, d$Year, sep="_")
for(i in seq_along(res)){
ASPH <- rio::import(d$ASPH[i])
ID <- rio::import(d$ID[i])
res[[i]] <- bind_rows(ASPH, ID) %>%
distinct(ID, Program, .keep_all = T) %>%
rowwise() %>%
mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== "Y", na.rm=T)) %>%
ungroup() %>%
mutate(racecode= case_when(Citizenship %in% list("NN", "NV") ~ "foreign_national",
`Race- Hispanic`== "Y" ~ "hispanic_latino",
racecount >1 ~ "two_or_more_races",
`Race-Am Ind`== "Y" ~ "american_indian_alaskan_native",
`Race- Asian`== "Y" ~ "asian",
`Race-Afr Amer`== "Y" ~ "black_african_american",
`Race- Hawaiian` == "Y" ~ "native_hawaiian_pacific_islander",
`Race- Caucasian`== "Y" ~ "white",
`Race-Not Rept`== "Y" ~ "race_unknown",
TRUE~ "race_unknown"),
gender_long= case_when(Gender== "F"~ "Female",
Gender== "M"~ "Male",
Gender== "N"~ "Other",
TRUE~ "other"),
DEPT= case_when(Program %in% list("3GPH363AMS", "3GPH363AMSP", "3GPH378AMCD", "3GPH378AMS", "3GPH379APHD")~ "COMD",
Program %in% list("3GPH593AMPH", "3GPH593AMS", "3GPH593APHD", "3GPH569ACGS")~ "ENHS",
Program %in% list("3GPH596AMS", "3GPH596AMSPH", "3GPH596APHD","3GPH594AMPH", "3GPH594AMS", "3GPH594AMSPH", "3GPH594APHD", "3GPH586APBAC")~ "EPID/BIOS",
Program %in% list("3GPH331AMS","3GPH331APHD","3GPH334AMS","3GPH335ADPT", "3GPH377AMS", "3GPH388AMS", "3GPH588AMPH", "3GPHJ331MS", "3UPH331ABS")~ "EXSC",
Program %in% list("3GPH568APBAC","3GPH592ACGS","3GPH592AMPH", "3GPH592APHD", "3GPH576ACGS", "3GPH121ACGS", "3GID635ACGS")~ "HPEB",
Program %in% list("3GPH591AMPH", "3GPH591APHD", "3GPH597AMHA","3GPH591ADPH")~ "HSPM",
TRUE~ "Missing"),
degree_delivery_type= case_when(`First Concentration`== "R999" | `Second Concentration`== "R999" ~ "Distance-based",
`First Concentration`== "3853" | `Second Concentration`== "3853" ~ "Executive",
TRUE~ "Campus-based"),
# FTE_compute= case_when(Level== "GR" & `Course Hours`<9 ~ round(`Course Hours`/9, #digits=2),
# Level== "GR" & `Course Hours`>=9~ 1,
# Level== "UG" & `Course Hours`<12~ round(`Course Hours`/12,
#digits=2),
# Level== "UG" & `Course Hours`>=12 ~ 1),
# Full_Part_Status=case_when((Level== "GR" & `Course Hours` <9)| (Level== "UG" &
#`Course Hours`<12)~"parttime_status",
# (Level=="GR" & `Course Hours`>=9)|(Level== "UG" & `Course
#Hours`>=12)~"fulltime_status",
# TRUE~ "other"),
Sem_Year= paste0(d$Sem[i],"_",d$Year[i]),
StudentCount= 1,
Acad_Year= case_when(Sem_Year %in% list("Fall_18", "Spring_19", "Summer_19")~ "AY2018-19",
Sem_Year %in% list("Fall_19", "Spring_20", "Summer_20")~ "AY2019-20",
Sem_Year %in% list("Fall_20", "Spring_21", "Summer_21")~ "AY2020-21",
Sem_Year %in% list("Fall_21", "Spring_22", "Summer_22")~ "AY2021-22",
Sem_Year %in% list("Fall_22", "Spring_23")~ "AY2022-23"),
Deg_group = case_when(Degree %in% list("DPT", "PHD", "DPH")~ "Doctorate",
Degree %in% list("MSP", "MCD", "MPH", "MHA", "MS","MSPH")~ "Masters",
Degree %in% list("CGS", "PBACC")~ "Certificate")) %>%
left_join(., PhGrad %>% mutate_at(vars(BannerID), ~as.character(.)), by= c("ID"="BannerID", "DEPT"), unmatched= "drop", relationship= "many-to-many") %>%
mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
is.na(Degree.y)== F~ Degree.y,
TRUE~ "Error")) %>%
select(-c(ApplicationID:StudentStatus))
}
答案1
得分: 2
library(dplyr)
data.frame(Sem_Year = c("Fall_21", "Spring_22", "Summer_22",
"Fall_31", "Spring_32", "Summer_32")) %>%
tidyr::separate(Sem_Year, c("Sem","Yr"), convert = TRUE, remove = FALSE) %>%
mutate(AY_end = Yr + if_else(Sem == "Fall", 1, 0),
Acad_Year = paste0("AY20", AY_end - 1, "-", AY_end)) %>%
select(-c(Sem, Yr, AY_end))
Result (Reminder: update in 2099)
Sem_Year Acad_Year
1 Fall_21 AY2021-22
2 Spring_22 AY2021-22
3 Summer_22 AY2021-22
4 Fall_31 AY2031-32
5 Spring_32 AY2031-32
6 Summer_32 AY2031-32
英文:
library(dplyr)
data.frame(Sem_Year = c("Fall_21", "Spring_22", "Summer_22",
"Fall_31", "Spring_32", "Summer_32")) %>%
tidyr::separate(Sem_Year, c("Sem","Yr"), convert = TRUE, remove = FALSE) %>%
mutate(AY_end = Yr + if_else(Sem == "Fall", 1, 0),
Acad_Year = paste0("AY20", AY_end - 1, "-", AY_end)) %>%
select(-c(Sem, Yr, AY_end))
Result (Reminder: update in 2099)
Sem_Year Acad_Year
1 Fall_21 AY2021-22
2 Spring_22 AY2021-22
3 Summer_22 AY2021-22
4 Fall_31 AY2031-32
5 Spring_32 AY2031-32
6 Summer_32 AY2031-32
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论