英文:
Randomly rearranging patches of a matrix in R
问题
我正在尝试随机重新排列矩阵中的补丁。这需要针对较大的矩阵和小补丁进行操作,因此使用for循环似乎不是一种实现的选项。假设我有一个数据矩阵如下:
data <- matrix(1:16, nrow = 4)
输出如下所示:
     [,1] [,2] [,3] [,4]
[1,]    1    5    9   13
[2,]    2    6   10   14
[3,]    3    7   11   15
[4,]    4    8   12   16
我现在想选择2x2的补丁并随机重新排列它们,以便输出可能看起来像这样:
     [,1] [,2] [,3] [,4]
[1,]   11   15    3    7
[2,]   12   16    4    8
[3,]    9   13    1    5
[4,]   10   14    2    6
到目前为止,我通过创建一个包含按正确顺序的索引的数字的矩阵,并对其进行重新排列来实现这一点,但是对于每个补丁在新的空矩阵中重新分配在存在数万个补丁时会变得非常耗时。
英文:
I am trying to randomly rearrange patches in a matrix. This will need to be done for larger matrices and small patches, so a for loop does not seem to be an option to achieve this. Let's say I have a data matrix like this:
data <- matrix(1:16, nrow = 4)
The output looks like
     [,1] [,2] [,3] [,4]
[1,]    1    5    9   13
[2,]    2    6   10   14
[3,]    3    7   11   15
[4,]    4    8   12   16
I now want to select 2x2 patches and rearrange them randomly, so that the output may look like this
     [,1] [,2] [,3] [,4]
[1,]   11   15    3    7
[2,]   12   16    4    8
[3,]    9   13    1    5
[4,]   10   14    2    6
I achieved this so far by creating a matrix containing numbers corresponding to the index in correct order, and rearranged, but the reassignment to a new, empty matrix in a for loop for each patch gets quite time consuming when there are tens of thousands of patches.
答案1
得分: 4
以下是翻译好的代码部分:
这是一种你可以使用的方法。我假设子矩阵块可以均匀地划分成矩阵。
data <- matrix(1:16, nrow = 4)
# 矩阵维度
nr <- nrow(data)
nc <- ncol(data)
# 块的宽度和高度
width <- 2L
height <- 2L
# 矩阵维度与块维度的比率
rr <- nr / height
cr <- nc / width
m <- matrix(1L, height, width)
# 创建块索引
(blocks <- matrix(seq(rr * cr), rr, cr) %x% m)
     [,1] [,2] [,3] [,4]
[1,]    1    1    3    3
[2,]    1    1    3    3
[3,]    2    2    4    4
[4,]    2    2    4    4
# 创建随机块索引
set.seed(5)
(r_blocks <- matrix(sample(rr * cr), rr, cr) %x% m)
     [,1] [,2] [,3] [,4]
[1,]    2    2    1    1
[2,]    2    2    1    1
[3,]    3    3    4    4
[4,]    3    3    4    4
# 通过匹配随机块位置与原始位置来创建新矩阵
# 并索引到原始矩阵
matrix(data[ave(r_blocks, r_blocks, FUN = \(v) which(blocks == v[1]))], dim(data))
     [,1] [,2] [,3] [,4]
[1,]    3    7    1    5
[2,]    4    8    2    6
[3,]    9   13   11   15
[4,]   10   14   12   16
英文:
Here is one approach you might use. I'm assuming that the submatrix blocks divide evenly into the matrix.
data <- matrix(1:16, nrow = 4)
# Matrix dimensions
nr <- nrow(data)
nc <- ncol(data)
# Block width and height
width <- 2L
height <- 2L
# Matrix dimension by block dimension ratios
rr <- nr / height
cr <- nc / width
m <- matrix(1L, height, width)
# Create block indices
(blocks <- matrix(seq(rr * cr), rr, cr) %x% m)
     [,1] [,2] [,3] [,4]
[1,]    1    1    3    3
[2,]    1    1    3    3
[3,]    2    2    4    4
[4,]    2    2    4    4
# Create random block indices
set.seed(5)
(r_blocks <- matrix(sample(rr * cr), rr, cr) %x% m)
     [,1] [,2] [,3] [,4]
[1,]    2    2    1    1
[2,]    2    2    1    1
[3,]    3    3    4    4
[4,]    3    3    4    4
# Create new matrix by matching the random block positions against the original positions
# and index against orginal matrix
    
matrix(data[ave(r_blocks, r_blocks, FUN = \(v) which(blocks == v[1]))], dim(data))
     [,1] [,2] [,3] [,4]
[1,]    3    7    1    5
[2,]    4    8    2    6
[3,]    9   13   11   15
[4,]   10   14   12   16
答案2
得分: 3
以下是使用线性索引和嵌套的sequence调用的方法。它可以几乎立即重新排列一个700x700的矩阵。
set.seed(211972494)
rearrange <- function(x, n = 2L) {
  d <- dim(x)
  n2 <- n^2
  x[i[,sample(length(x)/n2)]] <- x[
    i <- matrix(
      sequence(
        rep(n, length(x)/n),
        sequence(
          rep(n, length(x)/n2),
          sequence(
            rep(d[1]/n, d[2]/n),
            seq(1, length(x), n*d[1]),
            n
          ), d[1]
        )
      ), n2
    )
  ]
  x
}
测试:
rearrange(matrix(1:16, 4))
#>      [,1] [,2] [,3] [,4]
#> [1,]   11   15    3    7
#> [2,]   12   16    4    8
#> [3,]    1    5    9   13
#> [4,]    2    6   10   14
rearrange(matrix(1:54, 6), 3L)
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
#> [1,]    4   10   16    1    7   13   19   25   31
#> [2,]    5   11   17    2    8   14   20   26   32
#> [3,]    6   12   18    3    9   15   21   27   33
#> [4,]   22   28   34   37   43   49   40   46   52
#> [5,]   23   29   35   38   44   50   41   47   53
#> [6,]   24   30   36   39   45   51   42   48   54
计时较大的矩阵:
system.time(rearrange(matrix(1:700^2, 700)))
#>    user  system elapsed 
#>    0.02    0.00    0.02
system.time(rearrange(matrix(1:700^2, 700), 7L))
#>    user  system elapsed 
#>       0       0       0
另一个选项使用kronecker。这个方法可能稍微慢一些,但逻辑可能更容易理解。
rearrange2 <- function(x, n = 2L) {
  x[i[,sample(length(x)/n^2)]] <- x[
    i <- matrix(
      order(kronecker(array(seq_along(x), dim(x)/n), matrix(1L, n, n))),
      n^2
    )
  ]
  x
}
x <- matrix(1:700^2, 700)
s <- rep(1:200, each = 2)
i <- 0L
microbenchmark::microbenchmark(
  rearrange = rearrange(x),
  rearrange2 = rearrange2(x),
  check = "equal",
  setup = set.seed(s[i <- i + 1L])
)
#> Unit: milliseconds
#>        expr     min       lq     mean   median       uq     max neval
#>   rearrange 12.4906 12.88795 14.03972 13.13315 13.81755 20.0663   100
#>  rearrange2 33.5341 33.93110 36.86671 34.79870 38.44955 74.1596   100
英文:
Here is an approach using linear indexing with nested sequence calls. It can rearrange a 700x700 matrix almost instantly.
set.seed(211972494)
rearrange <- function(x, n = 2L) {
  d <- dim(x)
  n2 <- n^2
  x[i[,sample(length(x)/n2)]] <- x[
    i <- matrix(
      sequence(
        rep(n, length(x)/n),
        sequence(
          rep(n, length(x)/n2),
          sequence(
            rep(d[1]/n, d[2]/n),
            seq(1, length(x), n*d[1]),
            n
          ), d[1]
        )
      ), n2
    )
  ]
  x
}
Testing:
rearrange(matrix(1:16, 4))
#>      [,1] [,2] [,3] [,4]
#> [1,]   11   15    3    7
#> [2,]   12   16    4    8
#> [3,]    1    5    9   13
#> [4,]    2    6   10   14
rearrange(matrix(1:54, 6), 3L)
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
#> [1,]    4   10   16    1    7   13   19   25   31
#> [2,]    5   11   17    2    8   14   20   26   32
#> [3,]    6   12   18    3    9   15   21   27   33
#> [4,]   22   28   34   37   43   49   40   46   52
#> [5,]   23   29   35   38   44   50   41   47   53
#> [6,]   24   30   36   39   45   51   42   48   54
Timing a larger matrix
system.time(rearrange(matrix(1:700^2, 700)))
#>    user  system elapsed 
#>    0.02    0.00    0.02
system.time(rearrange(matrix(1:700^2, 700), 7L))
#>    user  system elapsed 
#>       0       0       0
Another option using kronecker. It's a little slower, but the logic may be easier to follow.
rearrange2 <- function(x, n = 2L) {
  x[i[,sample(length(x)/n^2)]] <- x[
    i <- matrix(
      order(kronecker(array(seq_along(x), dim(x)/n), matrix(1L, n, n))),
      n^2
    )
  ]
  x
}
x <- matrix(1:700^2, 700)
s <- rep(1:200, each = 2)
i <- 0L
microbenchmark::microbenchmark(
  rearrange = rearrange(x),
  rearrange2 = rearrange2(x),
  check = "equal",
  setup = set.seed(s[i <- i + 1L])
)
#> Unit: milliseconds
#>        expr     min       lq     mean   median       uq     max neval
#>   rearrange 12.4906 12.88795 14.03972 13.13315 13.81755 20.0663   100
#>  rearrange2 33.5341 33.93110 36.86671 34.79870 38.44955 74.1596   100
答案3
得分: 1
以下是您提供的代码的翻译:
# 补丁维度
d1 <- 2
d2 <- 2
# 块矩阵的掩码
msk <- kronecker(
    matrix(seq.int(length(data) / (d1 * d2)), nrow(data) / d1),
    matrix(1, d1, d2)
)
# 洗牌补丁
l <- sample(unname(tapply(data, msk, \(x) matrix(x, d1))))
# 重建矩阵
do.call(
    cbind,
    tapply(
        l,
        ceiling(seq_along(l) / (nrow(data) / d1)),
        \(x) do.call(rbind, x)
    )
)
它可以产生以下结果:
     [,1] [,2] [,3] [,4]
[1,]    9   13    3    7
[2,]   10   14    4    8
[3,]   11   15    1    5
[4,]   12   16    2    6
英文:
You can try the code below
# patch dimensions
d1 <- 2
d2 <- 2
# mask for block matrices
msk <- kronecker(
    matrix(seq.int(length(data) / (d1 * d2)), nrow(data) / d1),
    matrix(1, d1, d2)
)
# shuffle patches
l <- sample(unname(tapply(data, msk, \(x) matrix(x, d1))))
# reconstruct the matrix
do.call(
    cbind,
    tapply(
        l,
        ceiling(seq_along(l) / (nrow(data) / d1)),
        \(x) do.call(rbind, x)
    )
)
and it could produce
     [,1] [,2] [,3] [,4]
[1,]    9   13    3    7
[2,]   10   14    4    8
[3,]   11   15    1    5
[4,]   12   16    2    6
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。


评论