将学期分组成学年的概括

huangapple go评论56阅读模式
英文:

Grouping Semesters into Academic Years generalization

问题

我关于我的代码的特定部分有一些疑问。这个循环输入学期文件,计算新的列,并输出带有新变量的数据集。循环运行得很好,但Acad_Year变量是静态的,我正在寻找一种方法,使它更灵活,这样每次有新的数据集时,我就不需要去重新编写case_when语句。示例数据可用。谢谢您的帮助!


require("knitr")
setwd("~/Downloads/Stack Overflow/")

library(dplyr)
library(tidyr)
library(writexl)

PhGrad <- rbind(PhGrad_08, PhGrad_SP_23) %>%
  filter(!BannerID== "")

d <- tibble(
  filename = list.files(), 
  Sem = gsub(".*(Fall|Spring|Summer).*", "//1", filename), 
  Year = gsub(".*(//d{2}).*", "//1", filename), 
  grp = gsub(".*(ASPH|ID).*", "//1", filename)) %>%
  pivot_wider(names_from = "grp", values_from="filename")

res <- vector(mode="list", length=nrow(d))
names(res) <- paste(d$Sem, d$Year, sep="_")

for(i in seq_along(res)){
  ASPH <- rio::import(d$ASPH[i])
  ID <- rio::import(d$ID[i])
  
  res[[i]] <- bind_rows(ASPH, ID) %>%
    distinct(ID, Program, .keep_all = T) %>%
    rowwise() %>%
    mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== "Y", na.rm=T)) %>%
    ungroup() %>%
    mutate(racecode= case_when(Citizenship %in% list("NN", "NV") ~ "foreign_national",
                               `Race- Hispanic`== "Y" ~ "hispanic_latino", 
                                racecount >1 ~ "two_or_more_races",
                               `Race-Am Ind`== "Y"  ~ "american_indian_alaskan_native",
                               `Race- Asian`== "Y"  ~ "asian",
                               `Race-Afr Amer`== "Y"  ~ "black_african_american",
                               `Race- Hawaiian` == "Y"  ~ "native_hawaiian_pacific_islander",
                               `Race- Caucasian`== "Y" ~ "white",
                               `Race-Not Rept`== "Y" ~ "race_unknown",
                               TRUE~ "race_unknown"),
           gender_long= case_when(Gender== "F"~ "Female",
                                  Gender== "M"~ "Male",
                                  Gender== "N"~ "Other",
                                  TRUE~ "other"),
           DEPT= case_when(Program %in% list("3GPH363AMS", "3GPH363AMSP", "3GPH378AMCD", "3GPH378AMS", "3GPH379APHD")~ "COMD",
                           Program %in% list("3GPH593AMPH", "3GPH593AMS", "3GPH593APHD", "3GPH569ACGS")~ "ENHS",
                           Program %in% list("3GPH596AMS", "3GPH596AMSPH", "3GPH596APHD","3GPH594AMPH", "3GPH594AMS", "3GPH594AMSPH", "3GPH594APHD", "3GPH586APBAC")~ "EPID/BIOS", 
                           Program %in% list("3GPH331AMS","3GPH331APHD","3GPH334AMS","3GPH335ADPT", "3GPH377AMS", "3GPH388AMS", "3GPH588AMPH", "3GPHJ331MS", "3UPH331ABS")~ "EXSC",
                           Program %in% list("3GPH568APBAC","3GPH592ACGS","3GPH592AMPH", "3GPH592APHD", "3GPH576ACGS", "3GPH121ACGS", "3GID635ACGS")~ "HPEB",
                           Program %in% list("3GPH591AMPH", "3GPH591APHD", "3GPH597AMHA","3GPH591ADPH")~ "HSPM",
                           TRUE~ "Missing"), 
           degree_delivery_type= case_when(`First Concentration`== "R999" | `Second Concentration`== "R999" ~ "Distance-based",
                                           `First Concentration`== "3853" | `Second Concentration`== "3853" ~ "Executive", 
                                           TRUE~ "Campus-based"),
           Sem_Year= paste0(d$Sem[i],"_",d$Year[i]),
           StudentCount= 1,
      Acad_Year= case_when(Sem_Year %in% list("Fall_18", "Spring_19", "Summer_19")~ "AY2018-19",
                                 Sem_Year %in% list("Fall_19", "Spring_20", "Summer_20")~ "AY2019-20",
                                 Sem_Year %in% list("Fall_20", "Spring_21", "Summer_21")~ "AY2020-21",
                                 Sem_Year %in% list("Fall_21", "Spring_22", "Summer_22")~ "AY2021-22",
                                 Sem_Year %in% list("Fall_22", "Spring_23")~ "AY2022-23"),
      Deg_group = case_when(Degree %in% list("DPT", "PHD", "DPH")~ "Doctorate",
                            Degree %in% list("MSP", "MCD", "MPH", "MHA", "MS","MSPH")~ "Masters",
                            Degree %in% list("CGS", "PBACC")~ "Certificate")) %>% 
    left_join(., PhGrad %>% mutate_at(vars(BannerID), ~as.character(.)), by= c("ID"="BannerID", "DEPT"), unmatched= "drop", relationship= "many-to-many") %>% 
  mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
                     is.na(Degree.y)== F~ Degree.y,
                          TRUE~ "Error")) %>% 
  select(-c(ApplicationID:StudentStatus))
}
英文:

I have about a specific section of my code. The loop inputs semester files, computes new columns and outputs a data set with the new variables. The loop works beautifully, however making the Acad_Year variable is stagnant, I am looking for a way to make it more flexible so that I won't need to go in and re-write the case_when statement every time there is a new dataset. Sample data is available. Thank you in advance!

{r setup}

require(&quot;knitr&quot;)
 setwd(&quot;~/Downloads/Stack Overflow/&quot;)

library(dplyr)
library(tidyr)
library(writexl)

PhGrad &lt;- rbind(PhGrad_08, PhGrad_SP_23) %&gt;% 
  filter(!BannerID== &quot;&quot;)

d &lt;- tibble(
  filename = list.files(), 
  Sem = gsub(&quot;.*(Fall|Spring|Summer).*&quot;, &quot;//1&quot;, filename), 
  Year = gsub(&quot;.*(//d{2}).*&quot;, &quot;//1&quot;, filename), 
  grp = gsub(&quot;.*(ASPH|ID).*&quot;, &quot;//1&quot;, filename)) %&gt;% 
  pivot_wider(names_from = &quot;grp&quot;, values_from=&quot;filename&quot;)

res &lt;- vector(mode=&quot;list&quot;, length=nrow(d))
names(res) &lt;- paste(d$Sem, d$Year, sep=&quot;_&quot;)

for(i in seq_along(res)){
  ASPH &lt;- rio::import(d$ASPH[i])
  ID &lt;- rio::import(d$ID[i])
  
res[[i]] &lt;- bind_rows(ASPH, ID) %&gt;%
    distinct(ID, Program, .keep_all = T) %&gt;% 
    rowwise() %&gt;% 
    mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== &quot;Y&quot;, na.rm=T)) %&gt;% 
    ungroup() %&gt;% 
    mutate(racecode= case_when(Citizenship %in% list(&quot;NN&quot;, &quot;NV&quot;) ~ &quot;foreign_national&quot;,
                               `Race- Hispanic`== &quot;Y&quot; ~ &quot;hispanic_latino&quot;, 
                                racecount &gt;1 ~ &quot;two_or_more_races&quot;,
                               `Race-Am Ind`== &quot;Y&quot;  ~ &quot;american_indian_alaskan_native&quot;,
                               `Race- Asian`== &quot;Y&quot;  ~ &quot;asian&quot;,
                               `Race-Afr Amer`== &quot;Y&quot;  ~ &quot;black_african_american&quot;,
                               `Race- Hawaiian` == &quot;Y&quot;  ~ &quot;native_hawaiian_pacific_islander&quot;,
                               `Race- Caucasian`== &quot;Y&quot; ~ &quot;white&quot;,
                               `Race-Not Rept`== &quot;Y&quot; ~ &quot;race_unknown&quot;,
                               TRUE~ &quot;race_unknown&quot;),
           gender_long= case_when(Gender== &quot;F&quot;~ &quot;Female&quot;,
                                  Gender== &quot;M&quot;~ &quot;Male&quot;,
                                  Gender== &quot;N&quot;~ &quot;Other&quot;,
                                  TRUE~ &quot;other&quot;),
           DEPT= case_when(Program %in% list(&quot;3GPH363AMS&quot;, &quot;3GPH363AMSP&quot;, &quot;3GPH378AMCD&quot;, &quot;3GPH378AMS&quot;, &quot;3GPH379APHD&quot;)~ &quot;COMD&quot;,
                           Program %in% list(&quot;3GPH593AMPH&quot;, &quot;3GPH593AMS&quot;, &quot;3GPH593APHD&quot;, &quot;3GPH569ACGS&quot;)~ &quot;ENHS&quot;,
                           Program %in% list(&quot;3GPH596AMS&quot;, &quot;3GPH596AMSPH&quot;, &quot;3GPH596APHD&quot;,&quot;3GPH594AMPH&quot;, &quot;3GPH594AMS&quot;, &quot;3GPH594AMSPH&quot;, &quot;3GPH594APHD&quot;, &quot;3GPH586APBAC&quot;)~ &quot;EPID/BIOS&quot;, 
                           Program %in% list(&quot;3GPH331AMS&quot;,&quot;3GPH331APHD&quot;,&quot;3GPH334AMS&quot;,&quot;3GPH335ADPT&quot;, &quot;3GPH377AMS&quot;, &quot;3GPH388AMS&quot;, &quot;3GPH588AMPH&quot;, &quot;3GPHJ331MS&quot;, &quot;3UPH331ABS&quot;)~ &quot;EXSC&quot;,
                           Program %in% list(&quot;3GPH568APBAC&quot;,&quot;3GPH592ACGS&quot;,&quot;3GPH592AMPH&quot;, &quot;3GPH592APHD&quot;, &quot;3GPH576ACGS&quot;, &quot;3GPH121ACGS&quot;, &quot;3GID635ACGS&quot;)~ &quot;HPEB&quot;,
                           Program %in% list(&quot;3GPH591AMPH&quot;, &quot;3GPH591APHD&quot;, &quot;3GPH597AMHA&quot;,&quot;3GPH591ADPH&quot;)~ &quot;HSPM&quot;,
                           TRUE~ &quot;Missing&quot;), 
           degree_delivery_type= case_when(`First Concentration`== &quot;R999&quot; | `Second Concentration`== &quot;R999&quot; ~ &quot;Distance-based&quot;,
                                           `First Concentration`== &quot;3853&quot; | `Second Concentration`== &quot;3853&quot; ~ &quot;Executive&quot;, 
                                           TRUE~ &quot;Campus-based&quot;),
 # FTE_compute= case_when(Level== &quot;GR&quot; &amp; `Course Hours`&lt;9 ~ round(`Course Hours`/9, #digits=2),
 #                                  Level== &quot;GR&quot; &amp; `Course Hours`&gt;=9~ 1,
 #                                 Level== &quot;UG&quot; &amp; `Course Hours`&lt;12~ round(`Course Hours`/12, 
 #digits=2),
 #                                  Level== &quot;UG&quot; &amp; `Course Hours`&gt;=12 ~ 1),
 #          Full_Part_Status=case_when((Level== &quot;GR&quot; &amp; `Course Hours` &lt;9)| (Level== &quot;UG&quot; &amp; 
 #`Course Hours`&lt;12)~&quot;parttime_status&quot;,
 #                                      (Level==&quot;GR&quot; &amp; `Course Hours`&gt;=9)|(Level== &quot;UG&quot; &amp; `Course 
 #Hours`&gt;=12)~&quot;fulltime_status&quot;,
 #                                       TRUE~ &quot;other&quot;),
           Sem_Year= paste0(d$Sem[i],&quot;_&quot;,d$Year[i]),
           StudentCount= 1,
      Acad_Year= case_when(Sem_Year %in% list(&quot;Fall_18&quot;, &quot;Spring_19&quot;, &quot;Summer_19&quot;)~ &quot;AY2018-19&quot;,
                                 Sem_Year %in% list(&quot;Fall_19&quot;, &quot;Spring_20&quot;, &quot;Summer_20&quot;)~ &quot;AY2019-20&quot;,
                                 Sem_Year %in% list(&quot;Fall_20&quot;, &quot;Spring_21&quot;, &quot;Summer_21&quot;)~ &quot;AY2020-21&quot;,
                                 Sem_Year %in% list(&quot;Fall_21&quot;, &quot;Spring_22&quot;, &quot;Summer_22&quot;)~ &quot;AY2021-22&quot;,
                                 Sem_Year %in% list(&quot;Fall_22&quot;, &quot;Spring_23&quot;)~ &quot;AY2022-23&quot;),
      Deg_group = case_when(Degree %in% list(&quot;DPT&quot;, &quot;PHD&quot;, &quot;DPH&quot;)~ &quot;Doctorate&quot;,
                            Degree %in% list(&quot;MSP&quot;, &quot;MCD&quot;, &quot;MPH&quot;, &quot;MHA&quot;, &quot;MS&quot;,&quot;MSPH&quot;)~ &quot;Masters&quot;,
                            Degree %in% list(&quot;CGS&quot;, &quot;PBACC&quot;)~ &quot;Certificate&quot;)) %&gt;% 
    left_join(., PhGrad %&gt;% mutate_at(vars(BannerID), ~as.character(.)), by= c(&quot;ID&quot;=&quot;BannerID&quot;, &quot;DEPT&quot;), unmatched= &quot;drop&quot;, relationship= &quot;many-to-many&quot;) %&gt;% 
  mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
                     is.na(Degree.y)== F~ Degree.y,
                          TRUE~ &quot;Error&quot;)) %&gt;% 
  select(-c(ApplicationID:StudentStatus))
}

答案1

得分: 2

library(dplyr)
data.frame(Sem_Year = c("Fall_21", "Spring_22", "Summer_22",
"Fall_31", "Spring_32", "Summer_32")) %>%

tidyr::separate(Sem_Year, c("Sem","Yr"), convert = TRUE, remove = FALSE) %>%
mutate(AY_end = Yr + if_else(Sem == "Fall", 1, 0),
Acad_Year = paste0("AY20", AY_end - 1, "-", AY_end)) %>%
select(-c(Sem, Yr, AY_end))

Result (Reminder: update in 2099)

Sem_Year Acad_Year
1 Fall_21 AY2021-22
2 Spring_22 AY2021-22
3 Summer_22 AY2021-22
4 Fall_31 AY2031-32
5 Spring_32 AY2031-32
6 Summer_32 AY2031-32

英文:
library(dplyr)
data.frame(Sem_Year = c(&quot;Fall_21&quot;, &quot;Spring_22&quot;, &quot;Summer_22&quot;,
                        &quot;Fall_31&quot;, &quot;Spring_32&quot;, &quot;Summer_32&quot;)) %&gt;%
  
  tidyr::separate(Sem_Year, c(&quot;Sem&quot;,&quot;Yr&quot;), convert = TRUE, remove = FALSE) %&gt;%
  mutate(AY_end = Yr + if_else(Sem == &quot;Fall&quot;, 1, 0),
         Acad_Year = paste0(&quot;AY20&quot;, AY_end - 1, &quot;-&quot;, AY_end)) %&gt;%
  select(-c(Sem, Yr, AY_end))

Result (Reminder: update in 2099)

   Sem_Year Acad_Year
1   Fall_21 AY2021-22
2 Spring_22 AY2021-22
3 Summer_22 AY2021-22
4   Fall_31 AY2031-32
5 Spring_32 AY2031-32
6 Summer_32 AY2031-32

huangapple
  • 本文由 发表于 2023年6月2日 02:15:58
  • 转载请务必保留本文链接:https://go.coder-hub.com/76384647.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定