Apply a function in R on each row: function takes multiple columns from each row and returns multiple new columns

huangapple go评论64阅读模式
英文:

Apply a function in R on each row: function takes multiple columns from each row and returns multiple new columns

问题

这是要翻译的代码部分,不要翻译:

library(tidyverse)

find_sample_gaps <- function(site, analyte, df){
  
  Sample <- df %>%
    filter(site_code == site) %>%
    filter(analyte_code == analyte) %>%
    mutate(Year = as.numeric(format(Date, '%Y')))
  
  x <- Sample %>%
    group_by(Year) %>%
    summarize(n_samples = length(Year))
  
  gaps <- which(c(1,diff(x$Year)) > 1)
  
  a <- sum(x$n_samples) 
  b <- length(unique(Sample$Date))
  c <- 'No gaps'
  
  if(length(gaps) > 0){ 
    c <- paste('There are', as.character(gaps), 'gaps')
  }
  
  return(cbind(a, b, c))
}

# 使用函数将列添加到数据框中
result <- cbind(output1, find_sample_gaps(output1$site_code, output1$analyte_code, output2)) # 报错,因为output2数据框与result的大小不同?

# 另一种尝试,也使用cbind与sapply
result <- cbind(output1, t(sapply(c(output1$site_code, output1$analyte_code, output2), find_sample_gaps))) # 同样报错,无法识别输入到函数中?

以下是代码的翻译部分:

library(tidyverse)

find_sample_gaps <- function(site, analyte, df){
  
  Sample <- df %>%
    filter(site_code == site) %>%
    filter(analyte_code == analyte) %>%
    mutate(Year = as.numeric(format(Date, '%Y')))
  
  x <- Sample %>%
    group_by(Year) %>%
    summarize(n_samples = length(Year))
  
  gaps <- which(c(1,diff(x$Year)) > 1)
  
  a <- sum(x$n_samples) 
  b <- length(unique(Sample$Date))
  c <- 'No gaps'
  
  if(length(gaps) > 0){ 
    c <- paste('There are', as.character(gaps), 'gaps')
  }
  
  return(cbind(a, b, c))
}

# 使用函数将列添加到数据框中
result <- cbind(output1, find_sample_gaps(output1$site_code, output1$analyte_code, output2)) # 报错,因为output2数据框与result的大小不同?

# 另一种尝试,也使用cbind与sapply
result <- cbind(output1, t(sapply(c(output1$site_code, output1$analyte_code, output2), find_sample_gaps))) # 同样报错,无法识别输入到函数中?
英文:

The general idea of this question has been asked here

However the answer did not work in my specific case since I want to use a third input into the function, which is a large dataframe. I have also tried using sapply as per this post but that still does not work.

My goal is to avoid having to create the new columns by hand/use a for loop to append into the new columns. Is this possible in R? Is there another more 'R' way to structure my data and/or function? I looked at purrr::pmap but I don't know how to have it output multiple columns

Here is my minimal reproducible example:

library(tidyverse)

find_sample_gaps&lt;-function(site, analyte, df){
  
  Sample &lt;- df%&gt;%
    filter(site_code == site)%&gt;%filter(analyte_code == analyte)%&gt;%
    mutate(Year = as.numeric(format(Date, &#39;%Y&#39;)))
  
  x&lt;-Sample%&gt;%
    group_by(Year)%&gt;%
    summarize(n_samples = length(Year))
  
  gaps&lt;-which(c(1,diff(x$Year))&gt;1)
  
  a&lt;-sum(x$n_samples) 
  b&lt;-length(unique(Sample$Date))
  c&lt;-&#39;No gaps&#39;
  
  if(length(gaps)&gt;0){ 
    c&lt;-paste(&#39;There are&#39;, as.character(gaps), &#39;gaps&#39;)
  }
  
  return(cbind(a,b,c))
}

# use function inside cbind to add columns to dataframe

result&lt;-cbind(output1, find_sample_gaps(output1$site_code, output1$analyte_code, output2)) # throws error because output2 dataframe isn&#39;t the same size as result?

# another attempt also using cbind with sapply

result&lt;-cbind(output1, t(sapply(c(output1$site_code, output1$analyte_code, output2), find_sample_gaps))) # also throws error, does not recognize the inputs into the function?

Here is my input data:

output1&lt;-structure(list(site_code = c(&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;, &quot;f&quot;, &quot;g&quot;, 
&quot;h&quot;, &quot;i&quot;, &quot;j&quot;, &quot;j&quot;, &quot;j&quot;, &quot;j&quot;, &quot;j&quot;, &quot;j&quot;, &quot;j&quot;, &quot;k&quot;, &quot;k&quot;, &quot;k&quot;, &quot;k&quot;, 
&quot;k&quot;, &quot;k&quot;, &quot;k&quot;, &quot;l&quot;, &quot;l&quot;, &quot;l&quot;, &quot;l&quot;, &quot;l&quot;, &quot;l&quot;, &quot;m&quot;, &quot;n&quot;, &quot;o&quot;, &quot;p&quot;, 
&quot;q&quot;, &quot;r&quot;, &quot;s&quot;, &quot;t&quot;, &quot;u&quot;, &quot;v&quot;, &quot;w&quot;, &quot;w&quot;, &quot;w&quot;, &quot;w&quot;, &quot;w&quot;, &quot;x&quot;, &quot;x&quot;, 
&quot;x&quot;, &quot;x&quot;, &quot;x&quot;, &quot;y&quot;, &quot;y&quot;, &quot;y&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;z&quot;, &quot;aa&quot;, 
&quot;aa&quot;, &quot;aa&quot;, &quot;aa&quot;, &quot;aa&quot;, &quot;aa&quot;, &quot;aa&quot;, &quot;bb&quot;, &quot;bb&quot;, &quot;bb&quot;, &quot;bb&quot;, &quot;bb&quot;, 
&quot;cc&quot;, &quot;cc&quot;, &quot;cc&quot;, &quot;cc&quot;, &quot;cc&quot;, &quot;dd&quot;, &quot;dd&quot;, &quot;dd&quot;, &quot;dd&quot;, &quot;dd&quot;, &quot;ee&quot;, 
&quot;ee&quot;, &quot;ee&quot;, &quot;ee&quot;, &quot;ee&quot;, &quot;ee&quot;, &quot;ee&quot;, &quot;ff&quot;, &quot;ff&quot;, &quot;ff&quot;, &quot;ff&quot;, &quot;ff&quot;, 
&quot;gg&quot;, &quot;gg&quot;, &quot;gg&quot;, &quot;gg&quot;, &quot;gg&quot;, &quot;hh&quot;, &quot;hh&quot;, &quot;hh&quot;, &quot;hh&quot;, &quot;hh&quot;, &quot;hh&quot;, 
&quot;ii&quot;, &quot;ii&quot;, &quot;ii&quot;, &quot;ii&quot;, &quot;ii&quot;, &quot;ii&quot;, &quot;jj&quot;, &quot;jj&quot;, &quot;jj&quot;, &quot;jj&quot;, &quot;jj&quot;, 
&quot;jj&quot;, &quot;jj&quot;), analyte_code = c(&quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, 
&quot;a&quot;, &quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;, 
&quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, 
&quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;d&quot;, &quot;e&quot;, 
&quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;b&quot;, &quot;c&quot;, 
&quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, 
&quot;f&quot;, &quot;g&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, 
&quot;g&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;c&quot;, &quot;d&quot;, 
&quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, 
&quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;g&quot;)), row.names = c(NA, -115L), class = c(&quot;tbl_df&quot;, 
&quot;tbl&quot;, &quot;data.frame&quot;))
output2&lt;-structure(list(site_code = c(&quot;dd&quot;, &quot;k&quot;, &quot;k&quot;, &quot;r&quot;, &quot;aa&quot;, &quot;ii&quot;, 
&quot;y&quot;, &quot;l&quot;, &quot;l&quot;, &quot;l&quot;, &quot;q&quot;, &quot;cc&quot;, &quot;w&quot;, &quot;bb&quot;, &quot;c&quot;, &quot;ff&quot;, &quot;m&quot;, &quot;ii&quot;, 
&quot;p&quot;, &quot;ff&quot;, &quot;ff&quot;, &quot;z&quot;, &quot;ff&quot;, &quot;l&quot;, &quot;w&quot;, &quot;hh&quot;, &quot;ff&quot;, &quot;ff&quot;, &quot;ff&quot;, 
&quot;k&quot;, &quot;j&quot;, &quot;bb&quot;, &quot;x&quot;, &quot;hh&quot;, &quot;jj&quot;, &quot;z&quot;, &quot;dd&quot;, &quot;q&quot;, &quot;aa&quot;, &quot;k&quot;, &quot;bb&quot;, 
&quot;r&quot;, &quot;e&quot;, &quot;j&quot;, &quot;j&quot;, &quot;ii&quot;, &quot;y&quot;, &quot;hh&quot;, &quot;p&quot;, &quot;p&quot;, &quot;u&quot;, &quot;gg&quot;, &quot;ff&quot;, 
&quot;p&quot;, &quot;cc&quot;, &quot;u&quot;, &quot;dd&quot;, &quot;n&quot;, &quot;bb&quot;, &quot;bb&quot;, &quot;aa&quot;, &quot;ff&quot;, &quot;x&quot;, &quot;k&quot;, 
&quot;w&quot;, &quot;x&quot;, &quot;j&quot;, &quot;bb&quot;, &quot;cc&quot;, &quot;ii&quot;, &quot;hh&quot;, &quot;jj&quot;, &quot;b&quot;, &quot;hh&quot;, &quot;y&quot;, 
&quot;u&quot;, &quot;cc&quot;, &quot;hh&quot;, &quot;aa&quot;, &quot;b&quot;, &quot;jj&quot;, &quot;hh&quot;, &quot;gg&quot;, &quot;y&quot;, &quot;r&quot;, &quot;a&quot;, 
&quot;aa&quot;, &quot;aa&quot;, &quot;z&quot;, &quot;ff&quot;, &quot;ee&quot;, &quot;g&quot;, &quot;hh&quot;, &quot;hh&quot;, &quot;cc&quot;, &quot;hh&quot;, &quot;hh&quot;, 
&quot;h&quot;, &quot;l&quot;, &quot;k&quot;), analyte_code = c(&quot;e&quot;, &quot;b&quot;, &quot;b&quot;, &quot;c&quot;, &quot;f&quot;, &quot;d&quot;, 
&quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;d&quot;, &quot;f&quot;, &quot;c&quot;, &quot;g&quot;, &quot;a&quot;, &quot;a&quot;, &quot;e&quot;, &quot;a&quot;, &quot;e&quot;, &quot;a&quot;, 
NA, &quot;c&quot;, &quot;a&quot;, &quot;d&quot;, &quot;c&quot;, &quot;d&quot;, &quot;b&quot;, &quot;a&quot;, &quot;f&quot;, &quot;a&quot;, &quot;g&quot;, &quot;b&quot;, &quot;c&quot;, 
&quot;f&quot;, &quot;f&quot;, &quot;c&quot;, &quot;a&quot;, &quot;f&quot;, &quot;a&quot;, &quot;e&quot;, &quot;g&quot;, &quot;c&quot;, &quot;a&quot;, &quot;a&quot;, &quot;b&quot;, &quot;e&quot;, 
&quot;a&quot;, &quot;e&quot;, &quot;c&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;a&quot;, &quot;b&quot;, &quot;a&quot;, &quot;e&quot;, &quot;a&quot;, &quot;f&quot;, &quot;a&quot;, 
&quot;a&quot;, &quot;a&quot;, &quot;c&quot;, &quot;e&quot;, &quot;a&quot;, &quot;e&quot;, &quot;a&quot;, &quot;c&quot;, &quot;e&quot;, &quot;c&quot;, &quot;a&quot;, &quot;e&quot;, &quot;c&quot;, 
&quot;a&quot;, &quot;a&quot;, &quot;g&quot;, &quot;c&quot;, &quot;a&quot;, &quot;b&quot;, &quot;b&quot;, &quot;f&quot;, &quot;b&quot;, &quot;e&quot;, &quot;d&quot;, &quot;d&quot;, &quot;c&quot;, 
&quot;c&quot;, &quot;a&quot;, &quot;a&quot;, &quot;b&quot;, &quot;f&quot;, &quot;f&quot;, &quot;b&quot;, &quot;a&quot;, &quot;e&quot;, &quot;g&quot;, &quot;c&quot;, &quot;a&quot;, &quot;a&quot;, 
&quot;a&quot;, &quot;e&quot;, &quot;d&quot;), Date = structure(c(13326, 14287, 14403, 17669, 
16330, 18603, 17428, 15502, 18708, 13780, 17757, 18582, 18087, 
18582, 17433, 13326, 17674, 13668, 18059, 17966, 16701, 17142, 
14915, 16861, 13999, 15502, 15412, 16856, 14551, 18708, 12128, 
14314, 13326, 12563, 13780, 17224, 17611, 15703, 16239, 13780, 
12970, 16096, 16544, 17134, 18603, 13780, 18388, 15684, 19157, 
18684, 17449, 18857, 15075, 18746, 12683, 15618, 17142, 18634, 
15601, 17065, 15926, 12970, 17611, 16692, 13943, 12871, 16958, 
13263, 13451, 16179, 13094, 15044, 18131, 12212, 15966, 16410, 
14775, 13283, 16239, 16391, 17050, 13283, 16085, 16330, 17362, 
18393, 18087, 13724, 14396, 14396, 17331, 19106, 14215, 13388, 
14088, 18241, 18143, 17187, 13486, 12482), class = &quot;Date&quot;)), row.names = c(NA, 
100L), class = &quot;data.frame&quot;)

答案1

得分: 0

以下是翻译好的代码部分:

find_sample_gaps<-function(site, analyte, df){
    
    Sample <- df %>%
        filter(site_code == site) %>%
        filter(analyte_code == analyte) %>%
        mutate(Year = as.numeric(format(Date, '%Y')))
    
    x <- Sample %>%
        group_by(Year) %>%
        summarize(n_samples = length(Year))
    
    gaps <- which(c(1, diff(x$Year)) > 1)
    
    a <- sum(x$n_samples) 
    b <- length(unique(Sample$Date))
    c <- 'No gaps'
    
    if(length(gaps) > 0){ 
        c <- paste('There are', as.character(gaps), 'gaps')
    }
    
    comb <- list(a = a, b = b, c = c)
    
    return(comb)
}

output3 <- output1 %>%
    mutate(a = find_sample_gaps(site, analyte, all_of(output2))$a,
           b = find_sample_gaps(site, analyte, all_of(output2))$b,
           c = find_sample_gaps(site, analyte, all_of(output2))$c) 

output3
   site_code analyte_code     a     b c      
   <chr>     <chr>        <int> <int> <chr>  
 1 a         a                1     1 No gaps
 2 b         a                1     1 No gaps
 3 c         a                1     1 No gaps
 4 d         a                1     1 No gaps
 5 e         a                1     1 No gaps
 6 f         a                1     1 No gaps
 7 g         a                1     1 No gaps
 8 h         a                1     1 No gaps
 9 i         a                1     1 No gaps
10 j         b                1     1 No gaps

希望这能满足你的需要。

英文:

Does this give you what you are after? Note I changed the return of the function to be a list.

find_sample_gaps&lt;-function(site, analyte, df){
Sample &lt;- df%&gt;%
filter(site_code == site)%&gt;%filter(analyte_code == analyte)%&gt;%
mutate(Year = as.numeric(format(Date, &#39;%Y&#39;)))
x&lt;-Sample%&gt;%
group_by(Year)%&gt;%
summarize(n_samples = length(Year))
gaps&lt;-which(c(1,diff(x$Year))&gt;1)
a&lt;-sum(x$n_samples) 
b&lt;-length(unique(Sample$Date))
c&lt;-&#39;No gaps&#39;
if(length(gaps)&gt;0){ 
c&lt;-paste(&#39;There are&#39;, as.character(gaps), &#39;gaps&#39;)
}
comb &lt;- list(a = a, b = b, c = c)
return(comb)
}
output3 &lt;- output1 %&gt;%
mutate(a = find_sample_gaps(site, analyte, all_of(output2))$a,
b = find_sample_gaps(site, analyte, all_of(output2))$b,
c = find_sample_gaps(site, analyte, all_of(output2))$c) 
output3
site_code analyte_code     a     b c      
&lt;chr&gt;     &lt;chr&gt;        &lt;int&gt; &lt;int&gt; &lt;chr&gt;  
1 a         a                1     1 No gaps
2 b         a                1     1 No gaps
3 c         a                1     1 No gaps
4 d         a                1     1 No gaps
5 e         a                1     1 No gaps
6 f         a                1     1 No gaps
7 g         a                1     1 No gaps
8 h         a                1     1 No gaps
9 i         a                1     1 No gaps
10 j         b                1     1 No gaps

huangapple
  • 本文由 发表于 2023年2月6日 06:41:19
  • 转载请务必保留本文链接:https://go.coder-hub.com/75355995.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定