在R程序中,想要使用特定值从向量值填充矩阵的特定列的各行。

huangapple go评论89阅读模式
英文:

In R Program, want to fill a matrix from vector values across rows at a a specific column with specific value

问题

  1. R程序中,我有一个包含01的矩阵。见下面:

0 1 0 1 0 0
0 1 1 0 0 0
0 0 0 1 0 1
0 1 1 0 1 0
0 0 0 0 0 1

  1. 我想按行填充矩阵,使用列出的值(c("J" "J" "A" "A" "A" "A" "...一直到矩阵结束")但从每行的第一个1开始。见下面:

0 J J A A A
0 J J A A A
0 0 0 J J A
0 J J A A A
0 0 0 0 0 J

  1. 截至目前,我已经创建了一个值列表和一个确定第一个1所在位置的函数。我不知道如何将其应用于获得我想要的矩阵。
  2. `pattern<- c("A","A","A","A","A")`
  3. `pattern <- c("J","J", rep(pattern, length.out = ncol(Matrix)-2))`
  4. `indices<- apply(Matrix, 1, function(row) min(which(row == 1)))`
英文:

In R Program, I have a matrix containing 0 and 1's. See Below

  1. 0 1 0 1 0 0
  2. 0 1 1 0 0 0
  3. 0 0 0 1 0 1
  4. 0 1 1 0 1 0
  5. 0 0 0 0 0 1

I want to fill the matrix byrow from values listed (c("J" "J" "A" "A" "A" "A" "... continue "A" until end of matrix") but begin at the first 1 in each row. See below:

  1. 0 J J A A A
  2. 0 J J A A A
  3. 0 0 0 J J A
  4. 0 J J A A A
  5. 0 0 0 0 0 J

As of now, I have created a values list and a function to determine where the first 1 is. I'm lost on how to apply this to get the matrix I want.

pattern&lt;- c(&quot;A&quot;,&quot;A&quot;,&quot;A&quot;,&quot;A&quot;,&quot;A&quot;)
pattern &lt;- c(&quot;J&quot;,&quot;J&quot;, rep(pattern, length.out = ncol(Matrix)-2))
indices&lt;- apply(Matrix, 1, function(row) min(which(row == 1)))

答案1

得分: 0

获取零的第一部分,然后填充“J”,最后添加“A”。

  1. t(apply(mat, 1, function(x){
  2. res <- which(x == 1)[1] - 1
  3. res <- replace(res, is.na(res), length(x))
  4. c(x[0:res],
  5. rep("J", min(c(2, (length(x) - res)))),
  6. rep("A", max(c(0, (length(x) - res) - 2))))}))
  7. [,1] [,2] [,3] [,4] [,5] [,6]
  8. [1,] "J" "J" "A" "A" "A" "A"
  9. [2,] "0" "J" "J" "A" "A" "A"
  10. [3,] "0" "J" "J" "A" "A" "A"
  11. [4,] "0" "0" "0" "J" "J" "A"
  12. [5,] "0" "J" "J" "A" "A" "A"
  13. [6,] "0" "0" "0" "0" "0" "J"
  14. [7,] "0" "0" "0" "0" "0" "0"

数据

  1. mat <- structure(c(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
  2. 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
  3. 1, 0, 1, 0), dim = 7:6)
英文:

Get the first part of zeros, then fill the "J"s, finally add the "A"s.

  1. t(apply(mat, 1, function(x){
  2. res &lt;- which(x == 1)[1] - 1
  3. res &lt;- replace(res, is.na(res), length(x))
  4. c(x[0:res],
  5. rep(&quot;J&quot;, min(c(2, (length(x) - res)))),
  6. rep(&quot;A&quot;, max(c(0, (length(x) - res) - 2))))}))
  7. [,1] [,2] [,3] [,4] [,5] [,6]
  8. [1,] &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  9. [2,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  10. [3,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  11. [4,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot;
  12. [5,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  13. [6,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot;
  14. [7,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot;

Data

  1. mat &lt;- structure(c(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1,
  2. 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
  3. 1, 0, 1, 0), dim = 7:6)

答案2

得分: 0

  1. vec = c("J", "J", rep("A", ncol(m) - 2))
  2. m |>
  3. apply(MARGIN = 1, \(x) {
  4. x = cumsum(x) > 0
  5. y = rep("0", length(x))
  6. y[x] = vec[seq_len(sum(x))]
  7. y
  8. }) |>
  9. t()
  10. # [,1] [,2] [,3] [,4] [,5] [,6]
  11. # [1,] "0" "J" "J" "A" "A" "A"
  12. # [2,] "0" "J" "J" "A" "A" "A"
  13. # [3,] "0" "0" "0" "J" "J" "A"
  14. # [4,] "0" "J" "J" "A" "A" "A"
  15. # [5,] "0" "0" "0" "0" "0" "J"

Using this sample data:

  1. m = read.table(text = '0 1 0 1 0 0
  2. 0 1 1 0 0 0
  3. 0 0 0 1 0 1
  4. 0 1 1 0 1 0
  5. 0 0 0 0 0 1') |>
  6. as.matrix()
  1. <details>
  2. <summary>英文:</summary>

vec = c("J", "J", rep("A", ncol(m) - 2))
m |>
apply(MARGIN = 1, (x) {
x = cumsum(x) > 0
y = rep("0", length(x))
y[x] = vec[seq_len(sum(x))]
y
}) |>
t()

[,1] [,2] [,3] [,4] [,5] [,6]

[1,] "0" "J" "J" "A" "A" "A"

[2,] "0" "J" "J" "A" "A" "A"

[3,] "0" "0" "0" "J" "J" "A"

[4,] "0" "J" "J" "A" "A" "A"

[5,] "0" "0" "0" "0" "0" "J"

  1. ---
  2. Using this sample data:

m = read.table(text = '0 1 0 1 0 0
0 1 1 0 0 0
0 0 0 1 0 1
0 1 1 0 1 0
0 0 0 0 0 1') |> as.matrix()

  1. </details>
  2. # 答案3
  3. **得分**: 0
  4. n <- ncol(m)
  5. vec <- c("J", "J", rep("A", n - 2))
  6. t(apply(mat, 1, \(x)c(numeric(which(x>0)[1]-1), vec)[seq(n)]))
  7. [,1] [,2] [,3] [,4] [,5] [,6]
  8. [1,] "0" "J" "J" "A" "A" "A"
  9. [2,] "0" "J" "J" "A" "A" "A"
  10. [3,] "0" "0" "0" "J" "J" "A"
  11. [4,] "0" "J" "J" "A" "A" "A"
  12. [5,] "0" "0" "0" "0" "0" "J"
  13. <details>
  14. <summary>英文:</summary>
  15. feels easier to just do:
  16. n &lt;- ncol(m)
  17. vec &lt;- c(&quot;J&quot;, &quot;J&quot;, rep(&quot;A&quot;, n - 2))
  18. t(apply(mat, 1, \(x)c(numeric(which(x&gt;0)[1]-1), vec)[seq(n)]))
  19. [,1] [,2] [,3] [,4] [,5] [,6]
  20. [1,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  21. [2,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  22. [3,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot;
  23. [4,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  24. [5,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot;
  25. </details>
  26. # 答案4
  27. **得分**: 0
  28. 以下是已翻译的内容:
  29. A few vectorized options:
  30. ```R
  31. m <- max.col(cbind(mat, 1L), "f")
  32. m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
  33. matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
  34. #> [,1] [,2] [,3] [,4] [,5] [,6]
  35. #> [1,] "0" "J" "J" "A" "A" "A"
  36. #> [2,] "0" "J" "J" "A" "A" "A"
  37. #> [3,] "0" "0" "0" "J" "J" "A"
  38. #> [4,] "0" "J" "J" "A" "A" "A"
  39. #> [5,] "0" "0" "0" "0" "0" "J"

或者

  1. m <- max.col(cbind(mat, 1L), "f")
  2. array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
  3. #> [,1] [,2] [,3] [,4] [,5] [,6]
  4. #> [1,] "0" "J" "J" "A" "A" "A"
  5. #> [2,] "0" "J" "J" "A" "A" "A"
  6. #> [3,] "0" "0" "0" "J" "J" "A"
  7. #> [4,] "0" "J" "J" "A" "A" "A"
  8. #> [5,] "0" "0" "0" "0" "0" "J"

或者

  1. matrix(
  2. rep.int(c("0", "J", "A"), c(ncol(mat), 2L, ncol(mat) - 2L))[
  3. sequence(rep(ncol(mat), nrow(mat)), ncol(mat) - max.col(cbind(mat, 1L), "f") + 2L)
  4. ], nrow(mat), ncol(mat), 1
  5. )
  6. #> [,1] [,2] [,3] [,4] [,5] [,6]
  7. #> [1,] "0" "J" "J" "A" "A" "A"
  8. #> [2,] "0" "J" "J" "A" "A" "A"
  9. #> [3,] "0" "0" "0" "J" "J" "A"
  10. #> [4,] "0" "J" "J" "A" "A" "A"
  11. #> [5,] "0" "0" "0" "0" "0" "J"

Benchmarking shows the advantage of using a vectorized approach. Define various approaches as functions:

  1. f1 <- function(mat) {
  2. m <- max.col(cbind(mat, 1L), "f")
  3. m <- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
  4. matrix(c("0", "J", "A")[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
  5. }
  6. f2 <- function(mat) {
  7. m <- max.col(cbind(mat, 1L), "f")
  8. array(c("0", "J", "A")[(col(mat) >= m) + (col(mat) > m + 1L) + 1L], dim(mat))
  9. }
  10. f3 <- function(mat) {
  11. d <- dim(mat)
  12. matrix(
  13. rep.int(c("0", "J", "A"), c(d[2], 2L, d[2] - 2L))[
  14. sequence(rep(d[2], d[1]), d[2] - max.col(cbind(mat, 1L), "f") + 2L)
  15. ], d[1], d[2], 1
  16. )
  17. }
  18. Andre <- function(mat) {
  19. # from Andre Wildberg
  20. t(apply(mat, 1, function(x){
  21. res <- which(x == 1)[1] - 1
  22. res <- replace(res, is.na(res), length(x))
  23. c(x[0:res],
  24. rep("J", min(c(2, (length(x) - res)))),
  25. rep("A", max(c(0, (length(x) - res) - 2))))}))
  26. }

Benchmark on a large-ish matrix.

  1. mat <- matrix(sample(0:1, 1e5, 1, c(0.75, 0.25)), 1e4)
  2. microbenchmark::microbenchmark(
  3. f1 = f1(mat),
  4. f2 = f2(mat),
  5. f3 = f3(mat),
  6. Andre = Andre(mat),
  7. check = "equal"
  8. )
  9. #> Unit: milliseconds
  10. #> expr min lq mean median uq max neval
  11. #> f1 1.391700 1.894001 2.127106 1.967701 2.097001 7.966101 100
  12. #> f2 1.616000 2.240750 2.691387 2.361451 2.590051 7.142301 100
  13. #> f3 1.118401 1.570600 1.745991 1.619800 1.739251 5.924802 100
  14. #> Andre 68.022601 70.696101 73.181934 72.200000 73.931750 117.784401 100
英文:

A few vectorized options:

  1. m &lt;- max.col(cbind(mat, 1L), &quot;f&quot;)
  2. m &lt;- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
  3. matrix(c(&quot;0&quot;, &quot;J&quot;, &quot;A&quot;)[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
  4. #&gt; [,1] [,2] [,3] [,4] [,5] [,6]
  5. #&gt; [1,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  6. #&gt; [2,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  7. #&gt; [3,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot;
  8. #&gt; [4,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  9. #&gt; [5,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot;

Or

  1. m &lt;- max.col(cbind(mat, 1L), &quot;f&quot;)
  2. array(c(&quot;0&quot;, &quot;J&quot;, &quot;A&quot;)[(col(mat) &gt;= m) + (col(mat) &gt; m + 1L) + 1L], dim(mat))
  3. #&gt; [,1] [,2] [,3] [,4] [,5] [,6]
  4. #&gt; [1,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  5. #&gt; [2,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  6. #&gt; [3,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot;
  7. #&gt; [4,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  8. #&gt; [5,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot;

Or

  1. matrix(
  2. rep.int(c(&quot;0&quot;, &quot;J&quot;, &quot;A&quot;), c(ncol(mat), 2L, ncol(mat) - 2L))[
  3. sequence(rep(ncol(mat), nrow(mat)), ncol(mat) - max.col(cbind(mat, 1L), &quot;f&quot;) + 2L)
  4. ], nrow(mat), ncol(mat), 1
  5. )
  6. #&gt; [,1] [,2] [,3] [,4] [,5] [,6]
  7. #&gt; [1,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  8. #&gt; [2,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  9. #&gt; [3,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot;
  10. #&gt; [4,] &quot;0&quot; &quot;J&quot; &quot;J&quot; &quot;A&quot; &quot;A&quot; &quot;A&quot;
  11. #&gt; [5,] &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;0&quot; &quot;J&quot;

Benchmarking shows the advantage of using a vectorized approach. Define various approaches as functions:

  1. f1 &lt;- function(mat) {
  2. m &lt;- max.col(cbind(mat, 1L), &quot;f&quot;)
  3. m &lt;- rbind(m - 1L, pmin(2L, ncol(mat) - m + 1L), pmax(0L, ncol(mat) - m - 1L))
  4. matrix(c(&quot;0&quot;, &quot;J&quot;, &quot;A&quot;)[rep.int(row(m), m)], nrow(mat), ncol(mat), 1)
  5. }
  6. f2 &lt;- function(mat) {
  7. m &lt;- max.col(cbind(mat, 1L), &quot;f&quot;)
  8. array(c(&quot;0&quot;, &quot;J&quot;, &quot;A&quot;)[(col(mat) &gt;= m) + (col(mat) &gt; m + 1L) + 1L], dim(mat))
  9. }
  10. f3 &lt;- function(mat) {
  11. d &lt;- dim(mat)
  12. matrix(
  13. rep.int(c(&quot;0&quot;, &quot;J&quot;, &quot;A&quot;), c(d[2], 2L, d[2] - 2L))[
  14. sequence(rep(d[2], d[1]), d[2] - max.col(cbind(mat, 1L), &quot;f&quot;) + 2L)
  15. ], d[1], d[2], 1
  16. )
  17. }
  18. Andre &lt;- function(mat) {
  19. # from Andre Wildberg
  20. t(apply(mat, 1, function(x){
  21. res &lt;- which(x == 1)[1] - 1
  22. res &lt;- replace(res, is.na(res), length(x))
  23. c(x[0:res],
  24. rep(&quot;J&quot;, min(c(2, (length(x) - res)))),
  25. rep(&quot;A&quot;, max(c(0, (length(x) - res) - 2))))}))
  26. }

Benchmark on a large-ish matrix.

  1. mat &lt;- matrix(sample(0:1, 1e5, 1, c(0.75, 0.25)), 1e4)
  2. microbenchmark::microbenchmark(
  3. f1 = f1(mat),
  4. f2 = f2(mat),
  5. f3 = f3(mat),
  6. Andre = Andre(mat),
  7. check = &quot;equal&quot;
  8. )
  9. #&gt; Unit: milliseconds
  10. #&gt; expr min lq mean median uq max neval
  11. #&gt; f1 1.391700 1.894001 2.127106 1.967701 2.097001 7.966101 100
  12. #&gt; f2 1.616000 2.240750 2.691387 2.361451 2.590051 7.142301 100
  13. #&gt; f3 1.118401 1.570600 1.745991 1.619800 1.739251 5.924802 100
  14. #&gt; Andre 68.022601 70.696101 73.181934 72.200000 73.931750 117.784401 100

huangapple
  • 本文由 发表于 2023年6月8日 00:50:31
  • 转载请务必保留本文链接:https://go.coder-hub.com/76425529.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定