更快地重复初始化 R Matrix::sparseMatrix

huangapple go评论76阅读模式
英文:

Faster repeated initialization of R Matrix::sparseMatrix

问题

我正在使用R语言处理稀疏矩阵。这些矩阵是方阵,维度接近100万。我需要在代码中重复初始化这些矩阵,而这部分是一个瓶颈。出于各种原因,我使用了i-j-x的表示方法。首先,这些矩阵是块稀疏的,块之间存在冗余结构,使用i-j表示法复制块似乎更容易。此外,我处理的一些矩阵是矩阵的和,仅传递冗余的行索引和列索引比对两个矩阵求和更高效。关于如何构建矩阵,你有什么建议或不建议吗?

编辑:应该像这样:

n_blocks = 1000 # 块的数量
n_within_block = 1000 # 每个块的维度
n_nonnull_within_block = 40000 # 每个块中非零系数的数量
i_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # 一个块中的行索引
j_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # 一个块中的列索引
x_within_block = rnorm(runif(n_nonnull_within_block))  # 一个块中的系数
# 大多数步骤可以预先计算并重复使用
i = outer( # 行索引
  (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
  i_within_block, 
  "+"
)
j = outer( # 列索引
  (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
  j_within_block, 
  "+"
)
x = rep(x_within_block, n_blocks) # 系数的值

# 优化步骤(希望能提高效率)
t1 = Sys.time()
M = Matrix::sparseMatrix(
  i = i, j = j, x = x 
)
Sys.time()-t1
# 在各个地方使用小的维度!
# Matrix::image(M)
英文:

I am working with sparse matrices in R. They are square, with dimensions neighboring 1 million. I have to initialize those matrices repeatedly in my code, and this part is a bottleneck. I am using the i-j-x formulation, for various reasons. First, the matrices are block-sparse, with redundant structures across blocks, and it seems infinitely easier to replicate the blocks using the i-j formulation. Also, some of the matrices I work with are sums of matrices, and it is much more efficient just to pass redundant is and js than to sum two matrices. Any tips about how I should or should not build my matrices ?

Edit: should look like that


n_blocks = 1000 # number of blocks
n_within_block = 1000 # dimension of each block
n_nonnull_within_block = 40000 # number of non null coefficients within a block
i_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block = rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used
i = outer( # row idx
  (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
  i_within_block, 
  "+"
)
j = outer( # col idx
  (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
  j_within_block, 
  "+"
)
x =rep(x_within_block, n_blocks) # value of coefficients

# step to optimize (hopefully)
t1 = Sys.time()
M = Matrix::sparseMatrix(
  i = i, j = j, x = x 
)
Sys.time()-t1
# put small dimensions everywhere!
#Matrix::image(M)


答案1

得分: 2

演示了@MikaelJagan的建议,使用整数而不是双精度,并直接构建dgTMatrix

基准计时:

library(Matrix)

set.seed(1)
n_blocks = 1000 # number of blocks
n_within_block = 1000 # dimension of each block
n_nonnull_within_block = 40000 # number of non null coefficients within a block
i_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block = rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used

system.time({
  i <- outer( # row idx
    (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
    i_within_block, 
    "+"
  )
  
  j <- outer( # col idx
    (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
    j_within_block, 
    "+"
  )
  
  x <- rep(x_within_block, n_blocks) # value of coefficients
  
  # step to optimize (hopefully)
  M1 <- sparseMatrix(i = i, j = j, x = x, dims = rep(n_blocks*n_within_block, 2))
})
#>    user  system elapsed 
#>    3.28    0.64    3.92

使用整数而不是双精度:

set.seed(1)
n_blocks <- 1000L # number of blocks
n_within_block <- 1000L # dimension of each block
n_nonnull_within_block <- 40000L # number of non null coefficients within a block
i_within_block <- as.integer(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block <- as.integer(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block <- rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used

system.time({
  i <- outer( # row idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    i_within_block, 
    "+"
  )
  j <- outer( # col idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    j_within_block, 
    "+"
  )
  
  x <- rep(x_within_block, n_blocks) # value of coefficients
  
  M2 <- sparseMatrix(
    i = i, j = j, x = x,
    dims = rep(n_blocks*n_within_block, 2), index1 = FALSE
  )
})
#>    user  system elapsed 
#>    2.41    0.47    2.87

直接构建dgTMatrix

set.seed(1)
n_blocks <- 1000L # number of blocks
n_within_block <- 1000L # dimension of each block
n_nonnull_within_block <- 40000L # number of non null coefficients within a block
i_within_block <- as.integer(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block <- as.integer(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block <- rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used

system.time({
  M3 <- new("dgTMatrix")
  M3@Dim <- rep(n_blocks*n_within_block, 2)
  
  M3@i <- c(outer( # row idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    i_within_block, 
    "+"
  ))
  
  M3@j <- c(outer( # col idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    j_within_block, 
    "+"
  ))
  
  M3@x <- rep(x_within_block, n_blocks) # value of coefficients
})
#>    user  system elapsed 
#>    0.66    0.22    0.87

dgTMatrix转换为CsparseMatrix

system.time(M4 <- as(M3, "CsparseMatrix"))
#>    user  system elapsed 
#>    1.47    0.20    1.68
identical(M1, M2)
#> [1] TRUE
identical(M1, M4)
#> [1] TRUE

请注意,dgTMatrix在内存中比CsparseMatrix占用更多空间:

object.size(M1)
#> 474641504 bytes
object.size(M3)
#> 640001496 bytes
英文:

Demonstrating @MikaelJagan's suggestions of using integers instead of doubles and building a dgTMatrix directly:

Baseline timing:

library(Matrix)

set.seed(1)
n_blocks = 1000 # number of blocks
n_within_block = 1000 # dimension of each block
n_nonnull_within_block = 40000 # number of non null coefficients within a block
i_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block = ceiling(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block = rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used

system.time({
  i &lt;- outer( # row idx
    (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
    i_within_block, 
    &quot;+&quot;
  )
  
  j &lt;- outer( # col idx
    (ceiling(n_blocks*runif(n_blocks))-1)*n_within_block, 
    j_within_block, 
    &quot;+&quot;
  )
  
  x &lt;- rep(x_within_block, n_blocks) # value of coefficients
  
  # step to optimize (hopefully)
  M1 &lt;- sparseMatrix(i = i, j = j, x = x, dims = rep(n_blocks*n_within_block, 2))
})
#&gt;    user  system elapsed 
#&gt;    3.28    0.64    3.92

Using integers instead of doubles:

set.seed(1)
n_blocks &lt;- 1000L # number of blocks
n_within_block &lt;- 1000L # dimension of each block
n_nonnull_within_block &lt;- 40000L # number of non null coefficients within a block
i_within_block &lt;- as.integer(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block &lt;- as.integer(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block &lt;- rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used

system.time({
  i &lt;- outer( # row idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    i_within_block, 
    &quot;+&quot;
  )
  j &lt;- outer( # col idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    j_within_block, 
    &quot;+&quot;
  )
  
  x &lt;- rep(x_within_block, n_blocks) # value of coefficients
  
  M2 &lt;- sparseMatrix(
    i = i, j = j, x = x,
    dims = rep(n_blocks*n_within_block, 2), index1 = FALSE
  )
})
#&gt;    user  system elapsed 
#&gt;    2.41    0.47    2.87

Building a dgTMatrix directly:

set.seed(1)
n_blocks &lt;- 1000L # number of blocks
n_within_block &lt;- 1000L # dimension of each block
n_nonnull_within_block &lt;- 40000L # number of non null coefficients within a block
i_within_block &lt;- as.integer(n_within_block*runif(n_nonnull_within_block)) # row idx in one block
j_within_block &lt;- as.integer(n_within_block*runif(n_nonnull_within_block)) # col idx in one block
x_within_block &lt;- rnorm(runif(n_nonnull_within_block))  # coeff in one block
# most of those steps can be pre-computed once and re-used

system.time({
  M3 &lt;- new(&quot;dgTMatrix&quot;)
  M3@Dim &lt;- rep(n_blocks*n_within_block, 2)
  
  M3@i &lt;- c(outer( # row idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    i_within_block, 
    &quot;+&quot;
  ))
  
  M3@j &lt;- c(outer( # col idx
    as.integer(n_blocks*runif(n_blocks))*n_within_block, 
    j_within_block, 
    &quot;+&quot;
  ))
  
  M3@x &lt;- rep(x_within_block, n_blocks) # value of coefficients
})
#&gt;    user  system elapsed 
#&gt;    0.66    0.22    0.87

Converting a dgTMatrix to a CsparseMatrix:

system.time(M4 &lt;- as(M3, &quot;CsparseMatrix&quot;))
#&gt;    user  system elapsed 
#&gt;    1.47    0.20    1.68
identical(M1, M2)
#&gt; [1] TRUE
identical(M1, M4)
#&gt; [1] TRUE

Note that the dgTMatrix is larger in memory than the CsparseMatrix:

object.size(M1)
#&gt; 474641504 bytes
object.size(M3)
#&gt; 640001496 bytes

huangapple
  • 本文由 发表于 2023年8月9日 17:42:30
  • 转载请务必保留本文链接:https://go.coder-hub.com/76866469.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定