是的,可以将这段代码的一部分转换以提高速度吗?

huangapple go评论71阅读模式
英文:

It is possible convert part of this code to improve speed?

问题

I am new here,

Please, I need help to convert part of this code to anything to improve speed, and still compatibility with python programs to use this function. It is possible to improve speed with this?

On my system, an image colored 224x224 consumes 360 milliseconds to finish.

#!/usr/bin/env python3
# coding: utf-8
# original code https://github.com/verhovsky/squircle/blob/master/squircle.py

import cv2
import math
import time
import numpy

_epsilon = 0.0000000001

def _sgn(x):
    if x == 0.0:
        return 0.0
    if x < 0:
        return -1.0
    return 1.0

def _pixel_coordinates_to_unit(coordinate, max_value):
    return coordinate / max_value * 2 - 1

def _one_coordinates_to_pixels(coordinate, max_value):
    return (coordinate + 1) / 2 * max_value

def _stretch_square_to_disc(x, y):
    if (abs(x) < _epsilon) or (abs(y) < _epsilon):
        return x, y

    x2 = x * x
    y2 = y * y
    hypotenuse_squared = x * x + y * y

    reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)

    multiplier = 1.0

    if x2 > y2:
        multiplier = _sgn(x) * x * reciprocal_hypotenuse
    else:
        multiplier = _sgn(y) * y * reciprocal_hypotenuse

    return x * multiplier, y * multiplier

def _transform(inp):
    result = numpy.zeros_like(inp)
    for x, row in enumerate(inp):

        unit_x = _pixel_coordinates_to_unit(x, len(inp))

        for y, _ in enumerate(row):
            unit_y = _pixel_coordinates_to_unit(y, len(row))

            try:
                uv = _stretch_square_to_disc(unit_x, unit_y)
                if uv is None:
                    continue
                u, v = uv

                u = _one_coordinates_to_pixels(u, len(inp))
                v = _one_coordinates_to_pixels(v, len(row))

                result[x][y] = inp[math.floor(u)][math.floor(v)]
            except IndexError:
                pass

    return result

# -- load and test

img = cv2.imread('circle.png')

elapsed = round(time.time() * 1000)
squareImage = _transform(img[0:224, 0:224])
print(str(round(time.time() * 1000) - elapsed)+' ms to squareImage')

cv2.imshow('square', squareImage)
key = cv2.waitKey(0)

cv2.destroyAllWindows()

I expected to try to convert this code or part of this to anything faster, maybe CUDA to run directly on the GPU, or NUMBA, Cython, a library, etc...

英文:

I am new here,

Please, I need help to convert part of this code to anything to improve speed, and still compatibility with python programs to use this function. It is possible to improve speed with this?

On my system, a image colored 224x224 consumes 360 milliseconds to finish.

#!/usr/bin/env python3
# coding: utf-8
# original code https://github.com/verhovsky/squircle/blob/master/squircle.py
import cv2
import math
import time
import numpy
_epsilon = 0.0000000001
def _sgn(x):
if x == 0.0:
return 0.0
if x &lt; 0:
return -1.0
return 1.0
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
def _stretch_square_to_disc(x, y):
if (abs(x) &lt; _epsilon) or (abs(y) &lt; _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 &gt; y2:
multiplier = _sgn(x) * x * reciprocal_hypotenuse
else:
multiplier = _sgn(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
try:
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
except IndexError:
pass
return result
# -- load and test
img = cv2.imread(&#39;circle.png&#39;)
elapsed = round(time.time() * 1000)
squareImage = _transform(img[0:224, 0:224])
print(str(round(time.time() * 1000) - elapsed)+&#39; ms to squareImage&#39;)
cv2.imshow(&#39;square&#39;, squareImage)
key = cv2.waitKey(0)
cv2.destroyAllWindows()

I expected to try convert this code or part of this to anything more fasted, may be CUDA to run direct on GPU, or NUMBA, Cython, lib etc...

答案1

得分: 2

我使用numba装饰了这些函数(删除了_sgn,使用了np.sign,删除了try..except - 是否需要?):

import math
import time

import cv2
import numpy
from numba import njit

_epsilon = 0.0000000001


@njit
def _pixel_coordinates_to_unit(coordinate, max_value):
    return coordinate / max_value * 2 - 1


@njit
def _one_coordinates_to_pixels(coordinate, max_value):
    return (coordinate + 1) / 2 * max_value


@njit
def _stretch_square_to_disc(x, y):
    if (abs(x) < _epsilon) or (abs(y) < _epsilon):
        return x, y

    x2 = x * x
    y2 = y * y
    hypotenuse_squared = x * x + y * y

    reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)

    multiplier = 1.0

    if x2 > y2:
        multiplier = np.sign(x) * x * reciprocal_hypotenuse
    else:
        multiplier = np.sign(y) * y * reciprocal_hypotenuse

    return x * multiplier, y * multiplier


@njit
def _transform(inp):
    result = numpy.zeros_like(inp)
    for x, row in enumerate(inp):
        unit_x = _pixel_coordinates_to_unit(x, len(inp))

        for y, _ in enumerate(row):
            unit_y = _pixel_coordinates_to_unit(y, len(row))

            uv = _stretch_square_to_disc(unit_x, unit_y)
            if uv is None:
                continue

            u, v = uv

            u = _one_coordinates_to_pixels(u, len(inp))
            v = _one_coordinates_to_pixels(v, len(row))

            result[x][y] = inp[math.floor(u)][math.floor(v)]

    return result


# -- load and test

img = cv2.imread("circle.png")

# warm jit
# this is needed to let numba do the JIT optimizations
# if you run the the function "cold", the running time will be larger

# you can use compile-ahead-of-time
# https://numba.pydata.org/numba-doc/dev/user/pycc.html

squareImage = _transform(img[0:224, 0:224])

elapsed = time.perf_counter_ns()
squareImage = _transform(img[0:224, 0:224])
print(str((time.perf_counter_ns() - elapsed) / 1000) + " us to squareImage")

cv2.imwrite("shashed.png", squareImage)

在我的电脑上(AMD 5700X),它打印出:

528.596 us to squareImage
# without using numba:
# 47928.774 us to squareImage

使用的图像:

circle.png

是的,可以将这段代码的一部分转换以提高速度吗?

结果

是的,可以将这段代码的一部分转换以提高速度吗?

英文:

I've used numba to decorate the functions (removed the _sgn and used np.sign, removed the try..except - is it needed?):

import math
import time

import cv2
import numpy
from numba import njit

_epsilon = 0.0000000001


@njit
def _pixel_coordinates_to_unit(coordinate, max_value):
    return coordinate / max_value * 2 - 1


@njit
def _one_coordinates_to_pixels(coordinate, max_value):
    return (coordinate + 1) / 2 * max_value


@njit
def _stretch_square_to_disc(x, y):
    if (abs(x) &lt; _epsilon) or (abs(y) &lt; _epsilon):
        return x, y

    x2 = x * x
    y2 = y * y
    hypotenuse_squared = x * x + y * y

    reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)

    multiplier = 1.0

    if x2 &gt; y2:
        multiplier = np.sign(x) * x * reciprocal_hypotenuse
    else:
        multiplier = np.sign(y) * y * reciprocal_hypotenuse

    return x * multiplier, y * multiplier


@njit
def _transform(inp):
    result = numpy.zeros_like(inp)
    for x, row in enumerate(inp):
        unit_x = _pixel_coordinates_to_unit(x, len(inp))

        for y, _ in enumerate(row):
            unit_y = _pixel_coordinates_to_unit(y, len(row))

            uv = _stretch_square_to_disc(unit_x, unit_y)
            if uv is None:
                continue

            u, v = uv

            u = _one_coordinates_to_pixels(u, len(inp))
            v = _one_coordinates_to_pixels(v, len(row))

            result[x][y] = inp[math.floor(u)][math.floor(v)]

    return result


# -- load and test

img = cv2.imread(&quot;circle.png&quot;)

# warm jit
# this is needed to let numba do the JIT optimizations
# if you run the the function &quot;cold&quot;, the running time will be larger

# you can use compile-ahead-of-time
# https://numba.pydata.org/numba-doc/dev/user/pycc.html

squareImage = _transform(img[0:224, 0:224])

elapsed = time.perf_counter_ns()
squareImage = _transform(img[0:224, 0:224])
print(str((time.perf_counter_ns() - elapsed) / 1000) + &quot; us to squareImage&quot;)

cv2.imwrite(&quot;shashed.png&quot;, squareImage)

On my computer (AMD 5700X) it prints:

528.596 us to squareImage
# without using numba:
# 47928.774 us to squareImage

Images used:

circle.png

是的,可以将这段代码的一部分转换以提高速度吗?

the result

是的,可以将这段代码的一部分转换以提高速度吗?


huangapple
  • 本文由 发表于 2023年8月5日 03:17:12
  • 转载请务必保留本文链接:https://go.coder-hub.com/76838644.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定