英文:
Find min/max by index in list of lists
问题
给定一个大小为2的列表的列表,我正在尝试找到确定按索引查找最小/最大值的最快方法。目标是确定一系列XY点的边界/范围。
子列表是未排序的(在一个索引上的排序不保证另一个索引已排序)。
目前我正在执行以下操作:
xy = [(x1, y1), (x2, y2), ..., (xn, yn)]
xs, ys = zip(*xy)
xmax = max(xs)
xmin = min(xs)
ymax = max(ys)
ymin = min(ys)
如果没有错误,每个操作的复杂度都是O(n),因此总体复杂度是O(n)。
是否有一种更快的方法适用于任意大小的列表?
英文:
Given a list of lists of size 2, I am trying to find the quickest way to determine the min/max value by index. The goal is to determine the bounds/extent of a series of XY points.
The sublists are unsorted (sorting on one index doesn't guarantee the other is sorted).
Currently I am doing the following:
xy = [(x1, y1), (x2, y2), ..., (xn, yn)]
xs, ys = zip(*xy)
xmax = max(xs)
xmin = min(xs)
ymax = max(ys)
ymin = min(ys)
If not mistaken, each operation is O(n), so overall complexity is O(n).
Is there a faster way for an list of arbitrary size?
答案1
得分: 4
以下是您要翻译的代码部分:
这里有一些替代方法:
def extrema_zip(items):
split0, split1 = zip(*items)
return max(split0), min(split0), max(split1), min(split1)
def extrema_key(items):
return (
max(items, key=lambda x: x[0])[0],
min(items, key=lambda x: x[0])[0],
max(items, key=lambda x: x[1])[1],
min(items, key=lambda x: x[1])[1])
import numpy as np
def extrema_np(items):
arr = np.array(items)
return np.max(arr[:, 0]), np.min(arr[:, 0]), np.max(arr[:, 1]), np.min(arr[:, 1])
import numpy as np
def extrema_npt(items):
arr = np.array(items).transpose()
return np.max(arr[0, :]), np.min(arr[0, :]), np.max(arr[1, :]), np.min(arr[1, :])
def extrema_loop(items):
iter_items = iter(items)
first = next(iter_items)
x_min = x_max = first[0]
y_min = y_max = first[1]
for x, y in iter_items:
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
import numpy as np
import numba as nb
@nb.jit(nopython=True)
def _extrema_loop_nb(arr):
n, m = arr.shape
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x, y = arr[i, :]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
def extrema_loop_nb(items):
arr = np.array(items)
return _extrema_loop_nb(arr)
希望这对您有所帮助。
英文:
Here a couple of alternate methods:
def extrema_zip(items):
split0, split1 = zip(*items)
return max(split0), min(split0), max(split1), min(split1)
def extrema_key(items):
return (
max(items, key=lambda x: x[0])[0],
min(items, key=lambda x: x[0])[0],
max(items, key=lambda x: x[1])[1],
min(items, key=lambda x: x[1])[1])
import numpy as np
def extrema_np(items):
arr = np.array(items)
return np.max(arr[:, 0]), np.min(arr[:, 0]), np.max(arr[:, 1]), np.min(arr[:, 1])
import numpy as np
def extrema_npt(items):
arr = np.array(items).transpose()
return np.max(arr[0, :]), np.min(arr[0, :]), np.max(arr[1, :]), np.min(arr[1, :])
def extrema_loop(items):
iter_items = iter(items)
first = next(iter_items)
x_min = x_max = first[0]
y_min = y_max = first[1]
for x, y in iter_items:
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
import numpy as np
import numba as nb
@nb.jit(nopython=True)
def _extrema_loop_nb(arr):
n, m = arr.shape
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x, y = arr[i, :]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
def extrema_loop_nb(items):
arr = np.array(items)
return _extrema_loop_nb(arr)
and their respective timings as a function of input size:
which shows that actually direct looping is beneficial for your use-case.
(full benchmarks available here)
See here for similar approaches working on NumPy array inputs.
答案2
得分: 2
If you are willing to start from a NumPy array, you could adapt the solutions from here, omitting the senseless ones like those using zip()
or the key
parameter from min()
/max()
), and adding a couple more:
def extrema_py(arr):
return max(arr[:, 0]), min(arr[:, 0]), max(arr[:, 1]), min(arr[:, 1])
import numpy as np
def extrema_np(arr):
return np.max(arr[:, 0]), np.min(arr[:, 0]), np.max(arr[:, 1]), np.min(arr[:, 1])
import numpy as np
def extrema_npt(arr):
arr = arr.transpose()
return np.max(arr[0, :]), np.min(arr[0, :]), np.max(arr[1, :]), np.min(arr[1, :])
import numpy as np
def extrema_npa(arr):
x_max, y_max = np.max(arr, axis=0)
x_min, y_min = np.min(arr, axis=0)
return x_max, x_min, y_max, y_min
import numpy as np
def extrema_npat(arr):
arr = arr.transpose()
x_max, y_max = np.max(arr, axis=1)
x_min, y_min = np.min(arr, axis=1)
return x_max, x_min, y_max, y_min
def extrema_loop(arr):
n, m = arr.shape
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x, y = arr[i, :]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
import numba as nb
@nb.jit(nopython=True)
def extrema_loop_nb(arr):
n, m = arr.shape
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x = arr[i, 0]
y = arr[i, 1]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
%%cython -c-O3 -c-march=native -a
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True, infer_types=True
import numpy as np
import cython as cy
cdef void _extrema_loop_cy(
long[:, :] arr,
size_t n,
size_t m,
long[:, :] result):
cdef size_t i, j
cdef long x, y, x_max, x_min, y_max, y_min
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x = arr[i, 0]
y = arr[i, 1]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
result[0, 0] = x_max
result[0, 1] = x_min
result[1, 0] = y_max
result[1, 1] = y_min
def extrema_loop_cy(arr):
n, m = arr.shape
result = np.zeros((2, m), dtype=arr.dtype)
_extrema_loop_cy(arr, n, m, result)
return result[0, 0], result[0, 1], result[1, 0], result[1, 1]
And their respective timings as a function of input size:
So, for NumPy array inputs, one can get much faster timings.
The Numba- and Cython-based solution seems to be the fastest, remarkably outperforming the fastest NumPy-only approaches.
(full benchmarks available here)
(EDITED to improve Numba-based solution)
英文:
If you are willing to start from a NumPy array, you could adapt the solutions from here, omitting the senseless ones like those using zip()
or the key
parameter from min()
/max()
), and adding a couple more:
def extrema_py(arr):
return max(arr[:, 0]), min(arr[:, 0]), max(arr[:, 1]), min(arr[:, 1])
import numpy as np
def extrema_np(arr):
return np.max(arr[:, 0]), np.min(arr[:, 0]), np.max(arr[:, 1]), np.min(arr[:, 1])
import numpy as np
def extrema_npt(arr):
arr = arr.transpose()
return np.max(arr[0, :]), np.min(arr[0, :]), np.max(arr[1, :]), np.min(arr[1, :])
import numpy as np
def extrema_npa(arr):
x_max, y_max = np.max(arr, axis=0)
x_min, y_min = np.min(arr, axis=0)
return x_max, x_min, y_max, y_min
import numpy as np
def extrema_npat(arr):
arr = arr.transpose()
x_max, y_max = np.max(arr, axis=1)
x_min, y_min = np.min(arr, axis=1)
return x_max, x_min, y_max, y_min
def extrema_loop(arr):
n, m = arr.shape
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x, y = arr[i, :]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
import numba as nb
@nb.jit(nopython=True)
def extrema_loop_nb(arr):
n, m = arr.shape
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x = arr[i, 0]
y = arr[i, 1]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
return x_max, x_min, y_max, y_min
%%cython -c-O3 -c-march=native -a
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True, infer_types=True
import numpy as np
import cython as cy
cdef void _extrema_loop_cy(
long[:, :] arr,
size_t n,
size_t m,
long[:, :] result):
cdef size_t i, j
cdef long x, y, x_max, x_min, y_max, y_min
x_min = x_max = arr[0, 0]
y_min = y_max = arr[0, 1]
for i in range(1, n):
x = arr[i, 0]
y = arr[i, 1]
if x > x_max:
x_max = x
elif x < x_min:
x_min = x
if y > y_max:
y_max = y
elif y < y_min:
y_min = y
result[0, 0] = x_max
result[0, 1] = x_min
result[1, 0] = y_max
result[1, 1] = y_min
def extrema_loop_cy(arr):
n, m = arr.shape
result = np.zeros((2, m), dtype=arr.dtype)
_extrema_loop_cy(arr, n, m, result)
return result[0, 0], result[0, 1], result[1, 0], result[1, 1]
and their respective timings as a function of input size:
So, for NumPy array inputs, one can gets much faster timings.
The Numba- and Cython- based solution seems to be the fastest, remarkably outperforming the fastest NumPy-only approaches.
(full benchmarks available here)
(EDITED to improve Numba-based solution)
答案3
得分: 0
对于任意大小的列表,如果你想找到min
和max
,你必须查看每个元素,这是O(N),这是无法改变的。
话虽如此,显然,你可以将列表分成多个块并将它们提供给不同的进程,这将取决于你拥有的处理器核心数量,从而加快速度。
英文:
For the list of ANY size, if you're trying to find min
and max
, you have to take a look at EVERY element, which is O(N), and there's absolutely nothing we can do about that.
Having said that, you may, obviously, split the list into the chunks and feed them to the different processes, which will make it faster, depending on the processor cores you have.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论