将学期分组成学年的概括

huangapple go评论108阅读模式
英文:

Grouping Semesters into Academic Years generalization

问题

我关于我的代码的特定部分有一些疑问。这个循环输入学期文件,计算新的列,并输出带有新变量的数据集。循环运行得很好,但Acad_Year变量是静态的,我正在寻找一种方法,使它更灵活,这样每次有新的数据集时,我就不需要去重新编写case_when语句。示例数据可用。谢谢您的帮助!

  1. require("knitr")
  2. setwd("~/Downloads/Stack Overflow/")
  1. library(dplyr)
  2. library(tidyr)
  3. library(writexl)
  4. PhGrad <- rbind(PhGrad_08, PhGrad_SP_23) %>%
  5. filter(!BannerID== "")
  6. d <- tibble(
  7. filename = list.files(),
  8. Sem = gsub(".*(Fall|Spring|Summer).*", "//1", filename),
  9. Year = gsub(".*(//d{2}).*", "//1", filename),
  10. grp = gsub(".*(ASPH|ID).*", "//1", filename)) %>%
  11. pivot_wider(names_from = "grp", values_from="filename")
  12. res <- vector(mode="list", length=nrow(d))
  13. names(res) <- paste(d$Sem, d$Year, sep="_")
  14. for(i in seq_along(res)){
  15. ASPH <- rio::import(d$ASPH[i])
  16. ID <- rio::import(d$ID[i])
  17. res[[i]] <- bind_rows(ASPH, ID) %>%
  18. distinct(ID, Program, .keep_all = T) %>%
  19. rowwise() %>%
  20. mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== "Y", na.rm=T)) %>%
  21. ungroup() %>%
  22. mutate(racecode= case_when(Citizenship %in% list("NN", "NV") ~ "foreign_national",
  23. `Race- Hispanic`== "Y" ~ "hispanic_latino",
  24. racecount >1 ~ "two_or_more_races",
  25. `Race-Am Ind`== "Y" ~ "american_indian_alaskan_native",
  26. `Race- Asian`== "Y" ~ "asian",
  27. `Race-Afr Amer`== "Y" ~ "black_african_american",
  28. `Race- Hawaiian` == "Y" ~ "native_hawaiian_pacific_islander",
  29. `Race- Caucasian`== "Y" ~ "white",
  30. `Race-Not Rept`== "Y" ~ "race_unknown",
  31. TRUE~ "race_unknown"),
  32. gender_long= case_when(Gender== "F"~ "Female",
  33. Gender== "M"~ "Male",
  34. Gender== "N"~ "Other",
  35. TRUE~ "other"),
  36. DEPT= case_when(Program %in% list("3GPH363AMS", "3GPH363AMSP", "3GPH378AMCD", "3GPH378AMS", "3GPH379APHD")~ "COMD",
  37. Program %in% list("3GPH593AMPH", "3GPH593AMS", "3GPH593APHD", "3GPH569ACGS")~ "ENHS",
  38. Program %in% list("3GPH596AMS", "3GPH596AMSPH", "3GPH596APHD","3GPH594AMPH", "3GPH594AMS", "3GPH594AMSPH", "3GPH594APHD", "3GPH586APBAC")~ "EPID/BIOS",
  39. Program %in% list("3GPH331AMS","3GPH331APHD","3GPH334AMS","3GPH335ADPT", "3GPH377AMS", "3GPH388AMS", "3GPH588AMPH", "3GPHJ331MS", "3UPH331ABS")~ "EXSC",
  40. Program %in% list("3GPH568APBAC","3GPH592ACGS","3GPH592AMPH", "3GPH592APHD", "3GPH576ACGS", "3GPH121ACGS", "3GID635ACGS")~ "HPEB",
  41. Program %in% list("3GPH591AMPH", "3GPH591APHD", "3GPH597AMHA","3GPH591ADPH")~ "HSPM",
  42. TRUE~ "Missing"),
  43. degree_delivery_type= case_when(`First Concentration`== "R999" | `Second Concentration`== "R999" ~ "Distance-based",
  44. `First Concentration`== "3853" | `Second Concentration`== "3853" ~ "Executive",
  45. TRUE~ "Campus-based"),
  46. Sem_Year= paste0(d$Sem[i],"_",d$Year[i]),
  47. StudentCount= 1,
  48. Acad_Year= case_when(Sem_Year %in% list("Fall_18", "Spring_19", "Summer_19")~ "AY2018-19",
  49. Sem_Year %in% list("Fall_19", "Spring_20", "Summer_20")~ "AY2019-20",
  50. Sem_Year %in% list("Fall_20", "Spring_21", "Summer_21")~ "AY2020-21",
  51. Sem_Year %in% list("Fall_21", "Spring_22", "Summer_22")~ "AY2021-22",
  52. Sem_Year %in% list("Fall_22", "Spring_23")~ "AY2022-23"),
  53. Deg_group = case_when(Degree %in% list("DPT", "PHD", "DPH")~ "Doctorate",
  54. Degree %in% list("MSP", "MCD", "MPH", "MHA", "MS","MSPH")~ "Masters",
  55. Degree %in% list("CGS", "PBACC")~ "Certificate")) %>%
  56. left_join(., PhGrad %>% mutate_at(vars(BannerID), ~as.character(.)), by= c("ID"="BannerID", "DEPT"), unmatched= "drop", relationship= "many-to-many") %>%
  57. mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
  58. is.na(Degree.y)== F~ Degree.y,
  59. TRUE~ "Error")) %>%
  60. select(-c(ApplicationID:StudentStatus))
  61. }
英文:

I have about a specific section of my code. The loop inputs semester files, computes new columns and outputs a data set with the new variables. The loop works beautifully, however making the Acad_Year variable is stagnant, I am looking for a way to make it more flexible so that I won't need to go in and re-write the case_when statement every time there is a new dataset. Sample data is available. Thank you in advance!

  1. {r setup}
  2. require(&quot;knitr&quot;)
  3. setwd(&quot;~/Downloads/Stack Overflow/&quot;)
  1. library(dplyr)
  2. library(tidyr)
  3. library(writexl)
  4. PhGrad &lt;- rbind(PhGrad_08, PhGrad_SP_23) %&gt;%
  5. filter(!BannerID== &quot;&quot;)
  6. d &lt;- tibble(
  7. filename = list.files(),
  8. Sem = gsub(&quot;.*(Fall|Spring|Summer).*&quot;, &quot;//1&quot;, filename),
  9. Year = gsub(&quot;.*(//d{2}).*&quot;, &quot;//1&quot;, filename),
  10. grp = gsub(&quot;.*(ASPH|ID).*&quot;, &quot;//1&quot;, filename)) %&gt;%
  11. pivot_wider(names_from = &quot;grp&quot;, values_from=&quot;filename&quot;)
  12. res &lt;- vector(mode=&quot;list&quot;, length=nrow(d))
  13. names(res) &lt;- paste(d$Sem, d$Year, sep=&quot;_&quot;)
  14. for(i in seq_along(res)){
  15. ASPH &lt;- rio::import(d$ASPH[i])
  16. ID &lt;- rio::import(d$ID[i])
  17. res[[i]] &lt;- bind_rows(ASPH, ID) %&gt;%
  18. distinct(ID, Program, .keep_all = T) %&gt;%
  19. rowwise() %&gt;%
  20. mutate(racecount= sum(c_across(`Race-Am Ind`:`Race- Caucasian`)== &quot;Y&quot;, na.rm=T)) %&gt;%
  21. ungroup() %&gt;%
  22. mutate(racecode= case_when(Citizenship %in% list(&quot;NN&quot;, &quot;NV&quot;) ~ &quot;foreign_national&quot;,
  23. `Race- Hispanic`== &quot;Y&quot; ~ &quot;hispanic_latino&quot;,
  24. racecount &gt;1 ~ &quot;two_or_more_races&quot;,
  25. `Race-Am Ind`== &quot;Y&quot; ~ &quot;american_indian_alaskan_native&quot;,
  26. `Race- Asian`== &quot;Y&quot; ~ &quot;asian&quot;,
  27. `Race-Afr Amer`== &quot;Y&quot; ~ &quot;black_african_american&quot;,
  28. `Race- Hawaiian` == &quot;Y&quot; ~ &quot;native_hawaiian_pacific_islander&quot;,
  29. `Race- Caucasian`== &quot;Y&quot; ~ &quot;white&quot;,
  30. `Race-Not Rept`== &quot;Y&quot; ~ &quot;race_unknown&quot;,
  31. TRUE~ &quot;race_unknown&quot;),
  32. gender_long= case_when(Gender== &quot;F&quot;~ &quot;Female&quot;,
  33. Gender== &quot;M&quot;~ &quot;Male&quot;,
  34. Gender== &quot;N&quot;~ &quot;Other&quot;,
  35. TRUE~ &quot;other&quot;),
  36. DEPT= case_when(Program %in% list(&quot;3GPH363AMS&quot;, &quot;3GPH363AMSP&quot;, &quot;3GPH378AMCD&quot;, &quot;3GPH378AMS&quot;, &quot;3GPH379APHD&quot;)~ &quot;COMD&quot;,
  37. Program %in% list(&quot;3GPH593AMPH&quot;, &quot;3GPH593AMS&quot;, &quot;3GPH593APHD&quot;, &quot;3GPH569ACGS&quot;)~ &quot;ENHS&quot;,
  38. Program %in% list(&quot;3GPH596AMS&quot;, &quot;3GPH596AMSPH&quot;, &quot;3GPH596APHD&quot;,&quot;3GPH594AMPH&quot;, &quot;3GPH594AMS&quot;, &quot;3GPH594AMSPH&quot;, &quot;3GPH594APHD&quot;, &quot;3GPH586APBAC&quot;)~ &quot;EPID/BIOS&quot;,
  39. Program %in% list(&quot;3GPH331AMS&quot;,&quot;3GPH331APHD&quot;,&quot;3GPH334AMS&quot;,&quot;3GPH335ADPT&quot;, &quot;3GPH377AMS&quot;, &quot;3GPH388AMS&quot;, &quot;3GPH588AMPH&quot;, &quot;3GPHJ331MS&quot;, &quot;3UPH331ABS&quot;)~ &quot;EXSC&quot;,
  40. Program %in% list(&quot;3GPH568APBAC&quot;,&quot;3GPH592ACGS&quot;,&quot;3GPH592AMPH&quot;, &quot;3GPH592APHD&quot;, &quot;3GPH576ACGS&quot;, &quot;3GPH121ACGS&quot;, &quot;3GID635ACGS&quot;)~ &quot;HPEB&quot;,
  41. Program %in% list(&quot;3GPH591AMPH&quot;, &quot;3GPH591APHD&quot;, &quot;3GPH597AMHA&quot;,&quot;3GPH591ADPH&quot;)~ &quot;HSPM&quot;,
  42. TRUE~ &quot;Missing&quot;),
  43. degree_delivery_type= case_when(`First Concentration`== &quot;R999&quot; | `Second Concentration`== &quot;R999&quot; ~ &quot;Distance-based&quot;,
  44. `First Concentration`== &quot;3853&quot; | `Second Concentration`== &quot;3853&quot; ~ &quot;Executive&quot;,
  45. TRUE~ &quot;Campus-based&quot;),
  46. # FTE_compute= case_when(Level== &quot;GR&quot; &amp; `Course Hours`&lt;9 ~ round(`Course Hours`/9, #digits=2),
  47. # Level== &quot;GR&quot; &amp; `Course Hours`&gt;=9~ 1,
  48. # Level== &quot;UG&quot; &amp; `Course Hours`&lt;12~ round(`Course Hours`/12,
  49. #digits=2),
  50. # Level== &quot;UG&quot; &amp; `Course Hours`&gt;=12 ~ 1),
  51. # Full_Part_Status=case_when((Level== &quot;GR&quot; &amp; `Course Hours` &lt;9)| (Level== &quot;UG&quot; &amp;
  52. #`Course Hours`&lt;12)~&quot;parttime_status&quot;,
  53. # (Level==&quot;GR&quot; &amp; `Course Hours`&gt;=9)|(Level== &quot;UG&quot; &amp; `Course
  54. #Hours`&gt;=12)~&quot;fulltime_status&quot;,
  55. # TRUE~ &quot;other&quot;),
  56. Sem_Year= paste0(d$Sem[i],&quot;_&quot;,d$Year[i]),
  57. StudentCount= 1,
  58. Acad_Year= case_when(Sem_Year %in% list(&quot;Fall_18&quot;, &quot;Spring_19&quot;, &quot;Summer_19&quot;)~ &quot;AY2018-19&quot;,
  59. Sem_Year %in% list(&quot;Fall_19&quot;, &quot;Spring_20&quot;, &quot;Summer_20&quot;)~ &quot;AY2019-20&quot;,
  60. Sem_Year %in% list(&quot;Fall_20&quot;, &quot;Spring_21&quot;, &quot;Summer_21&quot;)~ &quot;AY2020-21&quot;,
  61. Sem_Year %in% list(&quot;Fall_21&quot;, &quot;Spring_22&quot;, &quot;Summer_22&quot;)~ &quot;AY2021-22&quot;,
  62. Sem_Year %in% list(&quot;Fall_22&quot;, &quot;Spring_23&quot;)~ &quot;AY2022-23&quot;),
  63. Deg_group = case_when(Degree %in% list(&quot;DPT&quot;, &quot;PHD&quot;, &quot;DPH&quot;)~ &quot;Doctorate&quot;,
  64. Degree %in% list(&quot;MSP&quot;, &quot;MCD&quot;, &quot;MPH&quot;, &quot;MHA&quot;, &quot;MS&quot;,&quot;MSPH&quot;)~ &quot;Masters&quot;,
  65. Degree %in% list(&quot;CGS&quot;, &quot;PBACC&quot;)~ &quot;Certificate&quot;)) %&gt;%
  66. left_join(., PhGrad %&gt;% mutate_at(vars(BannerID), ~as.character(.)), by= c(&quot;ID&quot;=&quot;BannerID&quot;, &quot;DEPT&quot;), unmatched= &quot;drop&quot;, relationship= &quot;many-to-many&quot;) %&gt;%
  67. mutate(New_Deg= case_when(is.na(Degree.y)== T~ Degree.x,
  68. is.na(Degree.y)== F~ Degree.y,
  69. TRUE~ &quot;Error&quot;)) %&gt;%
  70. select(-c(ApplicationID:StudentStatus))
  71. }

答案1

得分: 2

library(dplyr)
data.frame(Sem_Year = c("Fall_21", "Spring_22", "Summer_22",
"Fall_31", "Spring_32", "Summer_32")) %>%

tidyr::separate(Sem_Year, c("Sem","Yr"), convert = TRUE, remove = FALSE) %>%
mutate(AY_end = Yr + if_else(Sem == "Fall", 1, 0),
Acad_Year = paste0("AY20", AY_end - 1, "-", AY_end)) %>%
select(-c(Sem, Yr, AY_end))

Result (Reminder: update in 2099)

Sem_Year Acad_Year
1 Fall_21 AY2021-22
2 Spring_22 AY2021-22
3 Summer_22 AY2021-22
4 Fall_31 AY2031-32
5 Spring_32 AY2031-32
6 Summer_32 AY2031-32

英文:
  1. library(dplyr)
  2. data.frame(Sem_Year = c(&quot;Fall_21&quot;, &quot;Spring_22&quot;, &quot;Summer_22&quot;,
  3. &quot;Fall_31&quot;, &quot;Spring_32&quot;, &quot;Summer_32&quot;)) %&gt;%
  4. tidyr::separate(Sem_Year, c(&quot;Sem&quot;,&quot;Yr&quot;), convert = TRUE, remove = FALSE) %&gt;%
  5. mutate(AY_end = Yr + if_else(Sem == &quot;Fall&quot;, 1, 0),
  6. Acad_Year = paste0(&quot;AY20&quot;, AY_end - 1, &quot;-&quot;, AY_end)) %&gt;%
  7. select(-c(Sem, Yr, AY_end))

Result (Reminder: update in 2099)

  1. Sem_Year Acad_Year
  2. 1 Fall_21 AY2021-22
  3. 2 Spring_22 AY2021-22
  4. 3 Summer_22 AY2021-22
  5. 4 Fall_31 AY2031-32
  6. 5 Spring_32 AY2031-32
  7. 6 Summer_32 AY2031-32

huangapple
  • 本文由 发表于 2023年6月2日 02:15:58
  • 转载请务必保留本文链接:https://go.coder-hub.com/76384647.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定