英文:
Rowwise column name of minimum value in certain columns excluding NAs
问题
以下是翻译的代码部分:
library(dplyr)
Data <- tibble(Code = letters[1:6],
                Min_0 = c(12.3, NA, 1.1, NA, 0, NA),
                Min_1 = c(3.6, NA, 12.4, 1.7, 15.6, NA),
                Min_2 = c(45.6, NA, 2.4, 28.9, 32.7, NA),
                Min_3 = c(2.3, NA, NA, NA, NA, NA),
                Min_4 = c(52.4, NA, 23.6, 4.6, 0, NA),
                Min_5 = c(0, NA, 5.7, NA, 0.1, NA))
Min_Function <- function(x, f, ...) ifelse(all(is.na(x)), NA, 
                                   ifelse(all(is.na(x)), 
                                          0, f(x[x >= 0], na.rm = TRUE, ...)))
Data %>%
    rowwise() %>%
    mutate(Min_Time = Min_Function(c_across(starts_with("Min_") & -ends_with("0")), min))
请注意,我已经删除了HTML实体编码(如<和")以便进行更清晰的翻译。如果您需要任何其他帮助,请随时告诉我。
英文:
I have a dataset that looks like this:
library(dplyr)
Data <- tibble(Code = letters[1:6],
			Min_0 = c(12.3, NA, 1.1, NA, 0, NA),
			Min_1 = c(3.6, NA, 12.4, 1.7, 15.6, NA),
			Min_2 = c(45.6, NA, 2.4, 28.9, 32.7, NA),
			Min_3 = c(2.3, NA, NA, NA, NA, NA),
			Min_4 = c(52.4, NA, 23.6, 4.6, 0, NA),
			Min_5 = c(0, NA, 5.7, NA, 0.1, NA))
For every row I want to add a column with the minimum value from columns Min_1 to Min_5 or NA if that row only contains NAs. I would also like to add another column that contains the last digit of the column name that has the minimum value for that row.
I think I have a solution to my first question by adapting an answer from this question (in my case I only want to ignore NAs not NAs and zeros).
Min_Function <- function(x, f, ...) ifelse(all(is.na(x)), NA, 
                                   ifelse(all(is.na(x)), 
                                          0, f(x[x >= 0], na.rm = TRUE, ...)))
Data %>%
	rowwise() %>%
    mutate(Min_Time = Min_Function(c_across(starts_with("Min_") & -ends_with("0")), min))
My real data has many more columns which is why I need to use the starts_with and ends_with to select the columns I am interested in.
However, I don't know how I get an output like this for the second part of my question:
Data %>% mutate(Min_ID = c(5, NA, 2, 1, 4, NA))
答案1
得分: 1
你可以在 mutate 语句中使用 across() 函数和 case_when(),类似这样:
Data <- Data %>%
  rowwise() %>%
  mutate(Min_Value = Min_Function(c_across(starts_with("Min_") & -ends_with("0")), min),
         Min_ID = case_when(
           Min_Value == Min_1 ~ 1,
           Min_Value == Min_2 ~ 2,
           Min_Value == Min_3 ~ 3,
           Min_Value == Min_4 ~ 4,
           Min_Value == Min_5 ~ 5,
           TRUE ~ NA_integer_
         ))
Data
英文:
you can use case_when() with the across() function in the mutate statement
something like this
Data <- Data %>%
  rowwise() %>%
  mutate(Min_Value = Min_Function(c_across(starts_with("Min_") & -ends_with("0")), min),
         Min_ID = case_when(
           Min_Value == Min_1 ~ 1,
           Min_Value == Min_2 ~ 2,
           Min_Value == Min_3 ~ 3,
           Min_Value == Min_4 ~ 4,
           Min_Value == Min_5 ~ 5,
           TRUE ~ NA_integer_
         ))
Data
答案2
得分: 1
Data %>%
mutate(Min_Time = max.col(-across(starts_with("Min_") & -ends_with("0"),
~replace_na(.x, Inf)), 'first')*
NA^if_all(starts_with("Min_") & -ends_with("0"), is.na))
A tibble: 6 × 8
Code  Min_0 Min_1 Min_2 Min_3 Min_4 Min_5 Min_Time
1 a      12.3   3.6  45.6   2.3  52.4   0          5
2 b      NA    NA    NA    NA    NA    NA         NA
3 c       1.1  12.4   2.4  NA    23.6   5.7        2
4 d      NA     1.7  28.9  NA     4.6  NA          1
5 e       0    15.6  32.7  NA     0     0.1        4
6 f      NA    NA    NA    NA    NA    NA         NA
英文:
Data %>%
   mutate(Min_Time = max.col(-across(starts_with("Min_") & -ends_with("0"),
                 ~replace_na(.x, Inf)), 'first')*
            NA^if_all(starts_with("Min_") & -ends_with("0"), is.na))
# A tibble: 6 × 8
  Code  Min_0 Min_1 Min_2 Min_3 Min_4 Min_5 Min_Time
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
1 a      12.3   3.6  45.6   2.3  52.4   0          5
2 b      NA    NA    NA    NA    NA    NA         NA
3 c       1.1  12.4   2.4  NA    23.6   5.7        2
4 d      NA     1.7  28.9  NA     4.6  NA          1
5 e       0    15.6  32.7  NA     0     0.1        4
6 f      NA    NA    NA    NA    NA    NA         NA
答案3
得分: 0
你可以尝试以下使用data.table的解决方案,结合使用melt和dcast:
library(data.table)
dcast(
    melt(setDT(Data),
        id.var = c("Code", "Min_0")
    )[
        ,
        Min_Time := gsub(".*_", "", variable[which.min(value)]),
        .(Code, Min_0)
    ],
    ... ~ variable
)[,
    c(names(Data), "Min_Time"),
    with = FALSE
]
这将得到如下结果:
   Code Min_0 Min_1 Min_2 Min_3 Min_4 Min_5 Min_Time
1:    a  12.3   3.6  45.6   2.3  52.4   0.0        5
2:    b    NA    NA    NA    NA    NA    NA     <NA>
3:    c   1.1  12.4   2.4    NA  23.6   5.7        2
4:    d    NA   1.7  28.9    NA   4.6    NA        1
5:    e   0.0  15.6  32.7    NA   0.0   0.1        4
6:    f    NA    NA    NA    NA    NA    NA     <NA>
英文:
You can try the following data.table solution with melt + dcast
library(data.table)
dcast(
    melt(setDT(Data),
        id.var = c("Code", "Min_0")
    )[
        ,
        Min_Time := gsub(".*_", "", variable[which.min(value)]),
        .(Code, Min_0)
    ],
    ... ~ variable
)[,
    c(names(Data), "Min_Time"),
    with = FALSE
]
which gives
   Code Min_0 Min_1 Min_2 Min_3 Min_4 Min_5 Min_Time
1:    a  12.3   3.6  45.6   2.3  52.4   0.0        5
2:    b    NA    NA    NA    NA    NA    NA     <NA>
3:    c   1.1  12.4   2.4    NA  23.6   5.7        2
4:    d    NA   1.7  28.9    NA   4.6    NA        1
5:    e   0.0  15.6  32.7    NA   0.0   0.1        4
6:    f    NA    NA    NA    NA    NA    NA     <NA>
答案4
得分: 0
以下是翻译后的内容:
另外,只需使用`across`和`which`,我们可以获得以下结果
```r
Data %>% rowwise() %>%
  mutate(Min_ID = ifelse(!all(is.na(across(c(Min_1:Min_5)))), which(across(c(Min_1:Min_5)) ==
  min(across(c(Min_1:Min_5)), na.rm = T)), NA))
输出
行驶:
代码  Min_0 Min_1 Min_2 Min_3 Min_4 Min_5 Min_ID
1 a     12.3   3.6  45.6   2.3  52.4   0        5
2 b     NA    NA    NA    NA    NA    NA       NA
3 c      1.1  12.4   2.4  NA    23.6   5.7      2
4 d     NA     1.7  28.9  NA     4.6  NA        1
5 e      0    15.6  32.7  NA     0     0.1      4
6 f     NA    NA    NA    NA    NA    NA       NA
英文:
Alternatively with just across and which we can get the below
Data %>% rowwise() %>% 
  mutate(Min_ID =ifelse(!all(is.na(across(c(Min_1:Min_5)))),which(across(c(Min_1:Min_5))==
min(across(c(Min_1:Min_5)), na.rm = T)), NA))
# OUTPUT
# A tibble: 6 × 8
# Rowwise: 
  Code  Min_0 Min_1 Min_2 Min_3 Min_4 Min_5 Min_ID
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <int>
1 a      12.3   3.6  45.6   2.3  52.4   0        5
2 b      NA    NA    NA    NA    NA    NA       NA
3 c       1.1  12.4   2.4  NA    23.6   5.7      2
4 d      NA     1.7  28.9  NA     4.6  NA        1
5 e       0    15.6  32.7  NA     0     0.1      4
6 f      NA    NA    NA    NA    NA    NA       NA
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。


评论