英文:
It is possible convert part of this code to improve speed?
问题
I am new here,
Please, I need help to convert part of this code to anything to improve speed, and still compatibility with python programs to use this function. It is possible to improve speed with this?
On my system, an image colored 224x224 consumes 360 milliseconds to finish.
#!/usr/bin/env python3
# coding: utf-8
# original code https://github.com/verhovsky/squircle/blob/master/squircle.py
import cv2
import math
import time
import numpy
_epsilon = 0.0000000001
def _sgn(x):
if x == 0.0:
return 0.0
if x < 0:
return -1.0
return 1.0
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
def _stretch_square_to_disc(x, y):
if (abs(x) < _epsilon) or (abs(y) < _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 > y2:
multiplier = _sgn(x) * x * reciprocal_hypotenuse
else:
multiplier = _sgn(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
try:
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
except IndexError:
pass
return result
# -- load and test
img = cv2.imread('circle.png')
elapsed = round(time.time() * 1000)
squareImage = _transform(img[0:224, 0:224])
print(str(round(time.time() * 1000) - elapsed)+' ms to squareImage')
cv2.imshow('square', squareImage)
key = cv2.waitKey(0)
cv2.destroyAllWindows()
I expected to try to convert this code or part of this to anything faster, maybe CUDA to run directly on the GPU, or NUMBA, Cython, a library, etc...
英文:
I am new here,
Please, I need help to convert part of this code to anything to improve speed, and still compatibility with python programs to use this function. It is possible to improve speed with this?
On my system, a image colored 224x224 consumes 360 milliseconds to finish.
#!/usr/bin/env python3
# coding: utf-8
# original code https://github.com/verhovsky/squircle/blob/master/squircle.py
import cv2
import math
import time
import numpy
_epsilon = 0.0000000001
def _sgn(x):
if x == 0.0:
return 0.0
if x < 0:
return -1.0
return 1.0
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
def _stretch_square_to_disc(x, y):
if (abs(x) < _epsilon) or (abs(y) < _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 > y2:
multiplier = _sgn(x) * x * reciprocal_hypotenuse
else:
multiplier = _sgn(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
try:
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
except IndexError:
pass
return result
# -- load and test
img = cv2.imread('circle.png')
elapsed = round(time.time() * 1000)
squareImage = _transform(img[0:224, 0:224])
print(str(round(time.time() * 1000) - elapsed)+' ms to squareImage')
cv2.imshow('square', squareImage)
key = cv2.waitKey(0)
cv2.destroyAllWindows()
I expected to try convert this code or part of this to anything more fasted, may be CUDA to run direct on GPU, or NUMBA, Cython, lib etc...
答案1
得分: 2
我使用numba
装饰了这些函数(删除了_sgn
,使用了np.sign
,删除了try..except
- 是否需要?):
import math
import time
import cv2
import numpy
from numba import njit
_epsilon = 0.0000000001
@njit
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
@njit
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
@njit
def _stretch_square_to_disc(x, y):
if (abs(x) < _epsilon) or (abs(y) < _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 > y2:
multiplier = np.sign(x) * x * reciprocal_hypotenuse
else:
multiplier = np.sign(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
@njit
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
return result
# -- load and test
img = cv2.imread("circle.png")
# warm jit
# this is needed to let numba do the JIT optimizations
# if you run the the function "cold", the running time will be larger
# you can use compile-ahead-of-time
# https://numba.pydata.org/numba-doc/dev/user/pycc.html
squareImage = _transform(img[0:224, 0:224])
elapsed = time.perf_counter_ns()
squareImage = _transform(img[0:224, 0:224])
print(str((time.perf_counter_ns() - elapsed) / 1000) + " us to squareImage")
cv2.imwrite("shashed.png", squareImage)
在我的电脑上(AMD 5700X),它打印出:
528.596 us to squareImage
# without using numba:
# 47928.774 us to squareImage
使用的图像:
circle.png
结果
英文:
I've used numba
to decorate the functions (removed the _sgn
and used np.sign
, removed the try..except
- is it needed?):
import math
import time
import cv2
import numpy
from numba import njit
_epsilon = 0.0000000001
@njit
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
@njit
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
@njit
def _stretch_square_to_disc(x, y):
if (abs(x) < _epsilon) or (abs(y) < _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 > y2:
multiplier = np.sign(x) * x * reciprocal_hypotenuse
else:
multiplier = np.sign(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
@njit
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
return result
# -- load and test
img = cv2.imread("circle.png")
# warm jit
# this is needed to let numba do the JIT optimizations
# if you run the the function "cold", the running time will be larger
# you can use compile-ahead-of-time
# https://numba.pydata.org/numba-doc/dev/user/pycc.html
squareImage = _transform(img[0:224, 0:224])
elapsed = time.perf_counter_ns()
squareImage = _transform(img[0:224, 0:224])
print(str((time.perf_counter_ns() - elapsed) / 1000) + " us to squareImage")
cv2.imwrite("shashed.png", squareImage)
On my computer (AMD 5700X) it prints:
528.596 us to squareImage
# without using numba:
# 47928.774 us to squareImage
Images used:
circle.png
the result
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论