英文:
Get leading element for the lexicographical order
问题
如果M
是一个数值矩阵,我可以通过运行lexsort(M)
来按照词典顺序对其行进行排序,其中
lexorder <- function(M) {
do.call(order, lapply(seq_len(ncol(M)), function(i) M[, i]))
}
lexsort <- function(M) {
M[lexorder(M), ]
}
但我只对获取较大的行感兴趣(排序后矩阵的最后一行)。我们是否可以避免对所有内容进行排序,以更高效地提取最后一行?
英文:
If M
is a numeric matrix, I can order its rows with respect to the lexicographical order by running lexsort(M)
where
lexorder <- function(M) {
do.call(order, lapply(seq_len(ncol(M)), function(i) M[, i]))
}
lexsort <- function(M) {
M[lexorder(M), ]
}
But I'm only interested in getting the greater row (the last one of the ordered matrix). Can we avoid ordering everything to more efficiently extract the last row?
答案1
得分: 3
以下是已翻译的内容:
您可以编写一个更快的递归函数:
lex_max <- function(M,i=1){
d <- dim(M)
if(d[1] == 1 | i > d[2])M[1,]
else lex_max(M[max(M[,i]) == M[,i],,drop = FALSE], i+1)
}
a <- matrix(sample(500, 1e5, TRUE), 10)
计时:
大量列:
microbenchmark::microbenchmark(lex_max(a),
OP=lexsort(a)[nrow(a),],lexmaxrow(a), check = 'equal')
Unit: microseconds
expr min lq mean median uq max neval
lex_max(a) 45.7 57.95 80.442 65.55 87.45 306.4 100
OP 15819.5 19437.25 25579.100 22246.55 29002.80 68948.8 100
lexmaxrow(a) 16393.9 18739.65 25210.846 22022.40 29098.75 47731.1 100
---
a <- matrix(sample(500, 1e5, TRUE), 500)
Unit: microseconds
expr min lq mean median uq max neval
lex_max(a) 5.7 9.90 93.152 12.85 19.70 7124.0 100
OP 577.0 629.75 907.524 699.20 1017.70 7771.4 100
lexmaxrow(a) 470.5 521.05 875.526 619.45 908.85 10049.1 100
---
大量行
a <- matrix(sample(500, 1e5, TRUE), ncol=10)
Unit: microseconds
expr min lq mean median uq max neval
lex_max(a) 60.2 97.5 137.462 120.0 164.40 650.5 100
OP 594.0 775.9 1359.959 966.8 1251.35 14719.9 100
lexmaxrow(a) 475.1 624.1 1013.927 769.5 936.60 11775.1 100
在所有情况下,`lex_max` 函数的性能均快约10倍
### 编辑:
如果您需要位置,可以简单地执行:
which_lexmax <- function(M,i=1, b = seq_len(nrow(M))){
d <- dim(M)
if(d[1] == 1 | i > d[2])b[1]
else lex_max(M[mx <- max(M[,i]) == M[,i],,drop = FALSE], i+1, b[mx])
}
which_lexmax(a)
英文:
You couls write a recursive function that works faster:
lex_max <- function(M,i=1){
d <- dim(M)
if(d[1] == 1 | i > d[2])M[1,]
else lex_max(M[max(M[,i]) == M[,i],,drop = FALSE], i+1)
}
a <- matrix(sample(500, 1e5, TRUE), 10)
The timings:
Large number of columns:
microbenchmark::microbenchmark(lex_max(a),
OP=lexsort(a)[nrow(a),],lexmaxrow(a), check = 'equal')
Unit: microseconds
expr min lq mean median uq max neval
lex_max(a) 45.7 57.95 80.442 65.55 87.45 306.4 100
OP 15819.5 19437.25 25579.100 22246.55 29002.80 68948.8 100
lexmaxrow(a) 16393.9 18739.65 25210.846 22022.40 29098.75 47731.1 100
a <- matrix(sample(500, 1e5, TRUE), 500)
Unit: microseconds
expr min lq mean median uq max neval
lex_max(a) 5.7 9.90 93.152 12.85 19.70 7124.0 100
OP 577.0 629.75 907.524 699.20 1017.70 7771.4 100
lexmaxrow(a) 470.5 521.05 875.526 619.45 908.85 10049.1 100
Large number of rows
a <- matrix(sample(500, 1e5, TRUE), ncol=10)
Unit: microseconds
expr min lq mean median uq max neval
lex_max(a) 60.2 97.5 137.462 120.0 164.40 650.5 100
OP 594.0 775.9 1359.959 966.8 1251.35 14719.9 100
lexmaxrow(a) 475.1 624.1 1013.927 769.5 936.60 11775.1 100
In all the instances the lex_max
function performs >~10x faster
Edit:
If you need the position, you could simply do:
which_lexmax <- function(M,i=1, b = seq_len(nrow(M))){
d <- dim(M)
if(d[1] == 1 | i > d[2])b[1]
else lex_max(M[mx <- max(M[,i]) == M[,i],,drop = FALSE], i+1, b[mx])
}
which_lexmax(a)
答案2
得分: 2
Update
如果您想要得到具有最高词典顺序的行 和 行索引,您可以尝试以下代码
lex_max2 <- function(M) {
i <- 1
nc <- ncol(M)
idx <- 1:nrow(M)
repeat {
p <- max(M[, i]) == M[, i]
idx <- idx
M <- M
if (length(idx) == 1) {
return(list(lexmaxval = M, index = idx))
} else {
i <- i + 1
}
}
}
一个示例是
> M <- rbind(c(1, 2, 3), c(1, 2, 2), c(2, 3, 2), c(2, 2, 3))
> lex_max2(M)
$lexmaxval
[1] 2 3 2
$index
[1] 3
Previous Solution
与 Onyambu的回答 类似,但使用repeat
而不是递归
lex_max2 <- function(M) {
i <- 1
nc <- ncol(M)
repeat {
M <- M[max(M[, i]) == M[, i], ]
if (length(M) == nc) {
return(M)
} else {
i <- i + 1
}
}
}
你可以看到稍微提高了速度
> set.seed(0)
> M1 <- matrix(sample(500, 1e6, TRUE), ncol = 100)
> microbenchmark(
+ lex_max(M1),
+ lex_max2(M1),
+ check = "equal"
+ )
Unit: microseconds
expr min lq mean median uq max neval
lex_max(M1) 67.3 88.10 154.193 90.45 101.30 5700.1 100
lex_max2(M1) 60.0 85.75 94.461 87.80 97.15 158.9 100
> M2 <- matrix(sample(500, 1e7, TRUE), 500)
> microbenchmark(
+ lex_max(M2),
+ lex_max2(M2),
+ check = "equal"
+ )
Unit: microseconds
expr min lq mean median uq max neval
lex_max(M2) 135.9 187.00 232.651 200.25 254.95 597.1 100
lex_max2(M2) 89.6 113.15 152.559 126.20 159.40 528.7 100
英文:
Update
If you want both the row with highest lexicographical order and the row index, you can try the code below
lex_max2 <- function(M) {
i <- 1
nc <- ncol(M)
idx <- 1:nrow(M)
repeat {
p <- max(M[, i]) == M[, i]
idx <- idx
M <- M
if (length(idx) == 1) {
return(list(lexmaxval = M, index = idx))
} else {
i <- i + 1
}
}
}
and an example is
> M <- rbind(c(1, 2, 3), c(1, 2, 2), c(2, 3, 2), c(2, 2, 3))
> lex_max2(M)
$lexmaxval
[1] 2 3 2
$index
[1] 3
Previous Solution
Similar idea to Onyambu's answer but using repeat
rather than recursion
lex_max2 <- function(M) {
i <- 1
nc <- ncol(M)
repeat {
M <- M[max(M[, i]) == M[, i], ]
if (length(M) == nc) {
return(M)
} else {
i <- i + 1
}
}
}
and you can see a bit speed improvement
> set.seed(0)
> M1 <- matrix(sample(500, 1e6, TRUE), ncol = 100)
> microbenchmark(
+ lex_max(M1),
+ lex_max2(M1),
+ check = "equal"
+ )
Unit: microseconds
expr min lq mean median uq max neval
lex_max(M1) 67.3 88.10 154.193 90.45 101.30 5700.1 100
lex_max2(M1) 60.0 85.75 94.461 87.80 97.15 158.9 100
> M2 <- matrix(sample(500, 1e7, TRUE), 500)
> microbenchmark(
+ lex_max(M2),
+ lex_max2(M2),
+ check = "equal"
+ )
Unit: microseconds
expr min lq mean median uq max neval
lex_max(M2) 135.9 187.00 232.651 200.25 254.95 597.1 100
lex_max2(M2) 89.6 113.15 152.559 126.20 159.40 528.7 100
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论